From 597c5430001c4a013594814bb34c78bde36f689c Mon Sep 17 00:00:00 2001 From: Achal Shah Date: Tue, 5 Apr 2022 10:48:23 -0700 Subject: [PATCH] fix: Fix DataSource constructor to unbreak custom data sources (#2492) * fix: Fix DataSource constructor to unbreak custom data sources Signed-off-by: Achal Shah * fix first party refernces to use kwargs only Signed-off-by: Achal Shah * remove force kwargs Signed-off-by: Achal Shah --- sdk/python/feast/data_source.py | 49 +++++++++++++------ .../infra/offline_stores/bigquery_source.py | 10 ++-- .../spark_offline_store/spark_source.py | 10 ++-- .../feast/infra/offline_stores/file_source.py | 10 ++-- .../infra/offline_stores/redshift_source.py | 10 ++-- .../infra/offline_stores/snowflake_source.py | 10 ++-- 6 files changed, 60 insertions(+), 39 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index f23b1771e1..f8a28bf3a2 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -14,6 +14,7 @@ import enum +import warnings from abc import ABC, abstractmethod from typing import Any, Callable, Dict, Iterable, Optional, Tuple @@ -160,14 +161,34 @@ class DataSource(ABC): def __init__( self, - name: str, event_timestamp_column: Optional[str] = None, created_timestamp_column: Optional[str] = None, field_mapping: Optional[Dict[str, str]] = None, date_partition_column: Optional[str] = None, + name: Optional[str] = None, ): - """Creates a DataSource object.""" - self.name = name + """ + Creates a DataSource object. + Args: + name: Name of data source, which should be unique within a project + event_timestamp_column (optional): Event timestamp column used for point in time + joins of feature values. + created_timestamp_column (optional): Timestamp column indicating when the row + was created, used for deduplicating rows. + field_mapping (optional): A dictionary mapping of column names in this data + source to feature names in a feature table or view. Only used for feature + columns, not entity or timestamp columns. + date_partition_column (optional): Timestamp column used for partitioning. + """ + if not name: + warnings.warn( + ( + "Names for data sources need to be supplied. " + "Data sources without names will no tbe supported after Feast 0.23." + ), + UserWarning, + ) + self.name = name or "" self.event_timestamp_column = ( event_timestamp_column if event_timestamp_column else "" ) @@ -321,11 +342,11 @@ def __init__( date_partition_column: Optional[str] = "", ): super().__init__( - name, - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, + name=name, ) self.kafka_options = KafkaOptions( bootstrap_servers=bootstrap_servers, @@ -402,7 +423,7 @@ def __init__( self, name: str, schema: Dict[str, ValueType], ): """Creates a RequestDataSource object.""" - super().__init__(name) + super().__init__(name=name) self.schema = schema def validate(self, config: RepoConfig): @@ -485,11 +506,11 @@ def __init__( date_partition_column: Optional[str] = "", ): super().__init__( - name, - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=name, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) self.kinesis_options = KinesisOptions( record_format=record_format, region=region, stream_name=stream_name diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index 1d797077f0..ddc594e2ac 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -70,11 +70,11 @@ def __init__( ) super().__init__( - _name if _name else "", - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=_name if _name else "", + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) # Note: Python requires redefining hash in child classes that override __eq__ diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 3ffdf6eda0..b1cbf6ccd8 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -49,11 +49,11 @@ def __init__( else: raise DataSourceNoNameException() super().__init__( - _name, - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=_name if _name else "", + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) warnings.warn( "The spark data source API is an experimental feature in alpha development. " diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index 756ec2a65e..0760f4b32b 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -58,11 +58,11 @@ def __init__( ) super().__init__( - name if name else path, - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=name if name else path, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) # Note: Python requires redefining hash in child classes that override __eq__ diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index df42e18910..7a28c23037 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -63,11 +63,11 @@ def __init__( ) super().__init__( - _name if _name else "", - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=_name if _name else "", + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) @staticmethod diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index a972df191b..23c58751df 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -66,11 +66,11 @@ def __init__( ) super().__init__( - _name if _name else "", - event_timestamp_column, - created_timestamp_column, - field_mapping, - date_partition_column, + name=_name if _name else "", + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, ) @staticmethod