diff --git a/metadata-ingestion/docs/sources/demo-data/demo-data_recipe.yml b/metadata-ingestion/docs/sources/demo-data/demo-data_recipe.yml new file mode 100644 index 0000000000000..ebd6691126c84 --- /dev/null +++ b/metadata-ingestion/docs/sources/demo-data/demo-data_recipe.yml @@ -0,0 +1,3 @@ +source: + type: demo-data + config: {} diff --git a/metadata-ingestion/docs/sources/dummy-data/dummy-data_recipe.yml b/metadata-ingestion/docs/sources/dummy-data/dummy-data_recipe.yml deleted file mode 100644 index 1963847cee66e..0000000000000 --- a/metadata-ingestion/docs/sources/dummy-data/dummy-data_recipe.yml +++ /dev/null @@ -1,6 +0,0 @@ -source: - type: dummy-data - config: - # This source is a thin wrapper over file source for testing purposes - # This path is set in code so set to empty here. - path: "" \ No newline at end of file diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 51a133da4ecb1..17d192f203319 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -553,7 +553,7 @@ def get_long_description(): "presto-on-hive = datahub.ingestion.source.sql.presto_on_hive:PrestoOnHiveSource", "pulsar = datahub.ingestion.source.pulsar:PulsarSource", "salesforce = datahub.ingestion.source.salesforce:SalesforceSource", - "dummy-data = datahub.ingestion.source.dummy_source.DummySource", + "demo-data = datahub.ingestion.source.demo_data.DemoDataSource", "unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource", ], "datahub.ingestion.sink.plugins": [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/demo_data.py b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py new file mode 100644 index 0000000000000..1764596bb5e8d --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py @@ -0,0 +1,33 @@ +from datahub.configuration.common import ConfigModel +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SupportStatus, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.source.file import FileSourceConfig, GenericFileSource +from datahub.utilities.sample_data import download_sample_data + + +class DemoDataConfig(ConfigModel): + # The demo data source does not accept any configuration. + pass + + +@platform_name("Demo Data") +@config_class(ConfigModel) +@support_status(SupportStatus.UNKNOWN) +class DemoDataSource(GenericFileSource): + """ + This source loads sample data into DataHub. It is intended for demo and testing purposes only. + """ + + def __init__(self, ctx: PipelineContext, config: DemoDataConfig): + file_config = FileSourceConfig(filename=download_sample_data()) + super().__init__(ctx, file_config) + + @classmethod + def create(cls, config_dict, ctx): + config = DemoDataConfig.parse_obj(config_dict or {}) + return cls(ctx, config) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dummy_source.py b/metadata-ingestion/src/datahub/ingestion/source/dummy_source.py deleted file mode 100644 index 928831dbf51f7..0000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source/dummy_source.py +++ /dev/null @@ -1,29 +0,0 @@ -import pathlib - -from datahub.configuration.common import ConfigModel -from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.decorators import ( - SupportStatus, - config_class, - platform_name, - support_status, -) -from datahub.ingestion.source.file import FileSourceConfig, GenericFileSource -from datahub.utilities.sample_data import download_sample_data - - -class DummySourceConfig(FileSourceConfig): - path = pathlib.Path("") - - -@platform_name("DummySource") -@config_class(ConfigModel) -@support_status(SupportStatus.UNKNOWN) -class DummySource(GenericFileSource): - """ - This is a dummy plugin only for testing and fast demonstration of managed ingestion. - """ - - def __init__(self, ctx: PipelineContext, config: DummySourceConfig): - config.path = pathlib.Path(download_sample_data()) - super().__init__(ctx, config)