diff --git a/docs/cli.md b/docs/cli.md index c243a8c4dbf46..9e7de977cd3f1 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -100,7 +100,8 @@ We use a plugin architecture so that you can install only the dependencies you a | [trino](./generated/ingestion/sources/trino.md) | `pip install 'acryl-datahub[trino]'` | Trino source | | [starburst-trino-usage](./generated/ingestion/sources/trino.md) | `pip install 'acryl-datahub[starburst-trino-usage]'` | Starburst Trino usage statistics source | | [nifi](./generated/ingestion/sources/nifi.md) | `pip install 'acryl-datahub[nifi]'` | Nifi source | -| [powerbi](./generated/ingestion/sources/powerbi.md) | `pip install 'acryl-datahub[powerbi]'` | Microsoft Power BI source | +| [powerbi](./generated/ingestion/sources/powerbi.md#module-powerbi) | `pip install 'acryl-datahub[powerbi]'` | Microsoft Power BI source | +| [powerbi-report-server](./generated/ingestion/sources/powerbi.md#module-powerbi-report-server) | `pip install 'acryl-datahub[powerbi-report-server]'` | Microsoft Power BI Report Server source | ### Sinks diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi-report-server.md b/metadata-ingestion/docs/sources/powerbi/powerbi-report-server.md new file mode 100644 index 0000000000000..ca600f1078675 --- /dev/null +++ b/metadata-ingestion/docs/sources/powerbi/powerbi-report-server.md @@ -0,0 +1,13 @@ +## Configuration Notes +See the +1. [Microsoft Grant user access to a Report Server doc](https://docs.microsoft.com/en-us/sql/reporting-services/security/grant-user-access-to-a-report-server?view=sql-server-ver16) +2. Use your user credentials from previous step in yaml file +## Concept mapping + +| Power BI Report Server | Datahub | +| ------------------------- | ------------------- | +| `Paginated Report` | `Dashboard` | +| `Power BI Report` | `Dashboard` | +| `Mobile Report` | `Dashboard` | +| `Linked Report` | `Dashboard` | +| `Dataset, Datasource` | `N/A` | diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi-report-server_recipe.yml b/metadata-ingestion/docs/sources/powerbi/powerbi-report-server_recipe.yml new file mode 100644 index 0000000000000..cf58da2b6a2e5 --- /dev/null +++ b/metadata-ingestion/docs/sources/powerbi/powerbi-report-server_recipe.yml @@ -0,0 +1,29 @@ +source: + type: powerbi-report-server + config: + # Your Power BI Report Server Windows username + username: username + # Your Power BI Report Server Windows password + password: password + # Your Workstation name + workstation_name: workstation_name + # Your Power BI Report Server host URL, example: localhost:80 + host_port: host_port + # Your alias for Power BI Report Server host URL, example: local_powerbi_report_server + server_alias: server_alias + # Workspace's dataset environments, example: (PROD, DEV, QA, STAGE) + env: DEV + # Workspace's dataset environments, example: (PROD, DEV, QA, STAGE) + graphql_url: http://localhost:8080/api/graphql + # Your Power BI Report Server base virtual directory name for reports + report_virtual_directory_name: Reports + # Your Power BI Report Server base virtual directory name for report server + report_server_virtual_directory_name: ReportServer + # Enable/Disable extracting ownership information of Dashboard + extract_ownership: True + # Set ownership type + ownership_type: TECHNICAL_OWNER + + +sink: + # sink configs \ No newline at end of file diff --git a/metadata-ingestion/examples/mce_files/data_platforms.json b/metadata-ingestion/examples/mce_files/data_platforms.json index 0de966f7345d1..f3270f323e595 100644 --- a/metadata-ingestion/examples/mce_files/data_platforms.json +++ b/metadata-ingestion/examples/mce_files/data_platforms.json @@ -656,6 +656,26 @@ }, "proposedDelta": null }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DataPlatformSnapshot": { + "urn": "urn:li:dataPlatform:powerbi-report-server", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo": { + "datasetNameDelimiter": ".", + "name": "powerbi-report-server", + "displayName": "Power BI Report Server", + "type": "OTHERS", + "logoUrl": "/assets/platforms/powerbireportserverlogo.png" + } + } + ] + } + }, + "proposedDelta": null + }, { "auditHeader": null, "proposedSnapshot": { diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index cb4b8546ef500..134463c17730d 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -201,6 +201,8 @@ def get_long_description(): "deltalake", } +powerbi_report_server = {"requests", "requests_ntlm"} + usage_common = { "sqlparse", } @@ -324,6 +326,7 @@ def get_long_description(): "starburst-trino-usage": sql_common | usage_common | trino, "nifi": {"requests", "packaging"}, "powerbi": microsoft_common, + "powerbi-report-server": powerbi_report_server, "vertica": sql_common | {"sqlalchemy-vertica[vertica-python]==0.0.5"}, "unity-catalog": databricks_cli | {"requests"}, } @@ -414,6 +417,7 @@ def get_long_description(): "hive", "starburst-trino-usage", "powerbi", + "powerbi-report-server", "vertica", "salesforce", "unity-catalog" @@ -534,6 +538,7 @@ def get_long_description(): "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource", "nifi = datahub.ingestion.source.nifi:NifiSource", "powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource", + "powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource", "iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource", "vertica = datahub.ingestion.source.sql.vertica:VerticaSource", "presto-on-hive = datahub.ingestion.source.sql.presto_on_hive:PrestoOnHiveSource", diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/__init__.py new file mode 100644 index 0000000000000..45db1725ca1d1 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/__init__.py @@ -0,0 +1,9 @@ +from datahub.ingestion.source.powerbi_report_server.constants import Constant +from datahub.ingestion.source.powerbi_report_server.report_server import ( + PowerBiReportServerDashboardSource, +) +from datahub.ingestion.source.powerbi_report_server.report_server_domain import ( + CorpUser, + PowerBiReport, + Report, +) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py new file mode 100644 index 0000000000000..92560a11b90eb --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py @@ -0,0 +1,98 @@ +from enum import Enum + + +class CreatedFrom(Enum): + REPORT = "Report" + DATASET = "Dataset" + VISUALIZATION = "Visualization" + UNKNOWN = "Unknown" + + +class RelationshipDirection(Enum): + INCOMING = "INCOMING" + OUTGOING = "OUTGOING" + + +class Constant: + """ + keys used in powerbi plugin + """ + + DATASET = "DATASET" + REPORTS = "REPORTS" + REPORT = "REPORT" + REPORT_DATASOURCES = "REPORT_DATASOURCES" + TYPE_REPORT = "Report" + DATASOURCE = "DATASOURCE" + DATASET_DATASOURCES = "DATASET_DATASOURCES" + DatasetId = "DatasetId" + ReportId = "ReportId" + PowerBiReportId = "ReportId" + Dataset_URN = "DatasetURN" + DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}" + DASHBOARD = "dashboard" + DATASETS = "DATASETS" + DATASET_ID = "powerbi.linkedin.com/datasets/{}" + DATASET_PROPERTIES = "datasetProperties" + SUBSCRIPTION = "SUBSCRIPTION" + SYSTEM = "SYSTEM" + CATALOG_ITEM = "CATALOG_ITEM" + EXCEL_WORKBOOK = "EXCEL_WORKBOOK" + EXTENSIONS = "EXTENSIONS" + FAVORITE_ITEM = "FAVORITE_ITEM" + FOLDERS = "FOLDERS" + KPIS = "KPIS" + LINKED_REPORTS = "LINKED_REPORTS" + LINKED_REPORT = "LINKED_REPORT" + ME = "ME" + MOBILE_REPORTS = "MOBILE_REPORTS" + MOBILE_REPORT = "MOBILE_REPORT" + POWERBI_REPORTS = "POWERBI_REPORTS" + POWERBI_REPORT = "POWERBI_REPORT" + POWERBI_REPORT_DATASOURCES = "POWERBI_REPORT_DATASOURCES" + TYPE_POWERBI_REPORT = "PowerBIReport" + RESOURCE = "RESOURCE" + SESSION = "SESSION" + SYSTEM_POLICIES = "SYSTEM_POLICIES" + DATASET_KEY = "datasetKey" + BROWSERPATH = "browsePaths" + DATAPLATFORM_INSTANCE = "dataPlatformInstance" + STATUS = "status" + VALUE = "value" + ID = "ID" + DASHBOARD_INFO = "dashboardInfo" + DASHBOARD_KEY = "dashboardKey" + CORP_USER = "corpuser" + CORP_USER_INFO = "corpUserInfo" + OWNERSHIP = "ownership" + CORP_USER_KEY = "corpUserKey" + + +API_ENDPOINTS = { + Constant.CATALOG_ITEM: "{PBIRS_BASE_URL}/CatalogItems({CATALOG_ID})", + Constant.DATASETS: "{PBIRS_BASE_URL}/Datasets", + Constant.DATASET: "{PBIRS_BASE_URL}/Datasets({DATASET_ID})", + Constant.DATASET_DATASOURCES: "{PBIRS_BASE_URL}/Datasets({DATASET_ID})/DataSources", + Constant.DATASOURCE: "{PBIRS_BASE_URL}/DataSources({DATASOURCE_ID})", + Constant.EXCEL_WORKBOOK: "{PBIRS_BASE_URL}/ExcelWorkbooks({EXCEL_WORKBOOK_ID})", + Constant.EXTENSIONS: "{PBIRS_BASE_URL}/Extensions", + Constant.FAVORITE_ITEM: "{PBIRS_BASE_URL}/FavoriteItems({FAVORITE_ITEM_ID})", + Constant.FOLDERS: "{PBIRS_BASE_URL}/Folders({FOLDER_ID})", + Constant.KPIS: "{PBIRS_BASE_URL}/Kpis({KPI_ID})", + Constant.LINKED_REPORTS: "{PBIRS_BASE_URL}/LinkedReports", + Constant.LINKED_REPORT: "{PBIRS_BASE_URL}/LinkedReports({LINKED_REPORT_ID})", + Constant.ME: "{PBIRS_BASE_URLL}/Me", + Constant.MOBILE_REPORTS: "{PBIRS_BASE_URL}/MobileReports", + Constant.MOBILE_REPORT: "{PBIRS_BASE_URL}/MobileReports({MOBILE_REPORT_ID})", + Constant.POWERBI_REPORTS: "{PBIRS_BASE_URL}/PowerBiReports", + Constant.POWERBI_REPORT: "{PBIRS_BASE_URL}/PowerBiReports({POWERBI_REPORT_ID})", + Constant.POWERBI_REPORT_DATASOURCES: "{PBIRS_BASE_URL}/PowerBiReports({ID})/DataSources", + Constant.REPORTS: "{PBIRS_BASE_URL}/Reports", + Constant.REPORT: "{PBIRS_BASE_URL}/Reports({REPORT_ID})", + Constant.REPORT_DATASOURCES: "{PBIRS_BASE_URL}/Reports({ID})/DataSources", + Constant.RESOURCE: "{PBIRS_BASE_URL}/Resources({RESOURCE_GET})", + Constant.SESSION: "{PBIRS_BASE_URL}/Session", + Constant.SUBSCRIPTION: "{PBIRS_BASE_URL}/Subscriptions({SUBSCRIPTION_ID})", + Constant.SYSTEM: "{PBIRS_BASE_URL}/System", + Constant.SYSTEM_POLICIES: "{PBIRS_BASE_URL}/System/Policies", +} diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py new file mode 100644 index 0000000000000..878d084a0eb66 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -0,0 +1,575 @@ +######################################################### +# +# Meta Data Ingestion From the PowerBI Report Server +# +######################################################### +import logging +from dataclasses import dataclass, field as dataclass_field +from typing import Any, Dict, Iterable, List + +import pydantic +import requests +from requests.exceptions import ConnectionError +from requests_ntlm import HttpNtlmAuth + +import datahub.emitter.mce_builder as builder +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.source_common import EnvBasedSourceConfigBase +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SourceCapability, + SupportStatus, + capability, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.powerbi_report_server.constants import ( + API_ENDPOINTS, + Constant, +) +from datahub.ingestion.source.powerbi_report_server.report_server_domain import ( + CorpUser, + LinkedReport, + MobileReport, + Owner, + OwnershipData, + PowerBiReport, + Report, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps +from datahub.metadata.schema_classes import ( + BrowsePathsClass, + ChangeTypeClass, + CorpUserInfoClass, + CorpUserKeyClass, + DashboardInfoClass, + DashboardKeyClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, + StatusClass, +) +from datahub.utilities.dedup_list import deduplicate_list + +LOGGER = logging.getLogger(__name__) + + +class PowerBiReportServerAPIConfig(EnvBasedSourceConfigBase): + username: str = pydantic.Field(description="Windows account username") + password: str = pydantic.Field(description="Windows account password") + workstation_name: str = pydantic.Field( + default="localhost", description="Workstation name" + ) + host_port: str = pydantic.Field(description="Power BI Report Server host URL") + server_alias: str = pydantic.Field( + default="", description="Alias for Power BI Report Server host URL" + ) + graphql_url: str = pydantic.Field(description="GraphQL API URL") + report_virtual_directory_name: str = pydantic.Field( + description="Report Virtual Directory URL name" + ) + report_server_virtual_directory_name: str = pydantic.Field( + description="Report Server Virtual Directory URL name" + ) + extract_ownership: bool = pydantic.Field( + default=True, description="Whether ownership should be ingested" + ) + ownership_type: str = pydantic.Field( + default=OwnershipTypeClass.NONE, + description="Ownership type of owner", + ) + + @property + def get_base_api_http_url(self): + return "http://{}/{}/api/v2.0".format( + self.host_port, self.report_virtual_directory_name + ) + + @property + def get_base_api_https_url(self): + return "https://{}/{}/api/v2.0".format( + self.host_port, self.report_virtual_directory_name + ) + + @property + def get_base_url(self): + return "http://{}/{}/".format( + self.host_port, self.report_virtual_directory_name + ) + + @property + def host(self): + return self.server_alias or self.host_port.split(":")[0] + + +class PowerBiReportServerDashboardSourceConfig(PowerBiReportServerAPIConfig): + platform_name: str = "powerbi" + platform_urn: str = builder.make_data_platform_urn(platform=platform_name) + report_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() + chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() + + +class PowerBiReportServerAPI: + # API endpoints of PowerBI Report Server to fetch reports, datasets + + def __init__(self, config: PowerBiReportServerAPIConfig) -> None: + self.__config: PowerBiReportServerAPIConfig = config + self.__auth: HttpNtlmAuth = HttpNtlmAuth( + "{}\\{}".format(self.__config.workstation_name, self.__config.username), + self.__config.password, + ) + + @property + def get_auth_credentials(self): + return self.__auth + + def requests_get(self, url_http: str, url_https: str, content_type: str) -> Any: + try: + LOGGER.info("Request to Report URL={}".format(url_https)) + response = requests.get( + url=url_https, + auth=self.get_auth_credentials, + verify=False, + ) + except ConnectionError: + LOGGER.info("Request to Report URL={}".format(url_http)) + response = requests.get( + url=url_http, + auth=self.get_auth_credentials, + ) + # Check if we got response from PowerBi Report Server + if response.status_code != 200: + message: str = "Failed to fetch Report from powerbi-report-server for" + LOGGER.warning(message) + LOGGER.warning("{}={}".format(Constant.ReportId, content_type)) + raise ValueError(message) + + return response.json() + + def get_all_reports(self) -> List[Any]: + """ + Fetch all Reports from PowerBI Report Server + """ + report_types_mapping: Dict[str, Any] = { + Constant.REPORTS: Report, + Constant.MOBILE_REPORTS: MobileReport, + Constant.LINKED_REPORTS: LinkedReport, + Constant.POWERBI_REPORTS: PowerBiReport, + } + + reports: List[Any] = [] + for report_type in report_types_mapping.keys(): + report_get_endpoint: str = API_ENDPOINTS[report_type] + # Replace place holders + report_get_endpoint_http = report_get_endpoint.format( + PBIRS_BASE_URL=self.__config.get_base_api_http_url, + ) + report_get_endpoint_https = report_get_endpoint.format( + PBIRS_BASE_URL=self.__config.get_base_api_https_url, + ) + response_dict = self.requests_get( + url_http=report_get_endpoint_http, + url_https=report_get_endpoint_https, + content_type=report_type, + )["value"] + if response_dict: + reports.extend( + report_types_mapping[report_type].parse_obj(report) + for report in response_dict + ) + + return reports + + +class Mapper: + """ + Transfrom PowerBI Report Server concept Report to DataHub concept Dashboard + """ + + class EquableMetadataWorkUnit(MetadataWorkUnit): + """ + We can add EquableMetadataWorkUnit to set. + This will avoid passing same MetadataWorkUnit to DataHub Ingestion framework. + """ + + def __eq__(self, instance): + return self.id == self.id + + def __hash__(self): + return id(self.id) + + def __init__(self, config: PowerBiReportServerDashboardSourceConfig): + self.__config = config + + @staticmethod + def new_mcp( + entity_type, + entity_urn, + aspect_name, + aspect, + change_type=ChangeTypeClass.UPSERT, + ): + """ + Create MCP + """ + return MetadataChangeProposalWrapper( + entityType=entity_type, + changeType=change_type, + entityUrn=entity_urn, + aspectName=aspect_name, + aspect=aspect, + ) + + def __to_work_unit( + self, mcp: MetadataChangeProposalWrapper + ) -> EquableMetadataWorkUnit: + return Mapper.EquableMetadataWorkUnit( + id="{PLATFORM}-{ENTITY_URN}-{ASPECT_NAME}".format( + PLATFORM=self.__config.platform_name, + ENTITY_URN=mcp.entityUrn, + ASPECT_NAME=mcp.aspectName, + ), + mcp=mcp, + ) + + @staticmethod + def to_urn_set(mcps: List[MetadataChangeProposalWrapper]) -> List[str]: + return deduplicate_list( + [ + mcp.entityUrn + for mcp in mcps + if mcp is not None and mcp.entityUrn is not None + ] + ) + + def to_ownership_set( + self, + mcps: List[MetadataChangeProposalWrapper], + existing_owners: List[OwnerClass], + ) -> List[Owner]: + ownership = [ + Owner(owner=owner.owner, type=owner.type) for owner in existing_owners + ] + for mcp in mcps: + if mcp is not None and mcp.entityUrn is not None: + ownership.append( + Owner(owner=mcp.entityUrn, type=self.__config.ownership_type) + ) + return deduplicate_list(ownership) + + def __to_datahub_dashboard( + self, + report: Report, + chart_mcps: List[MetadataChangeProposalWrapper], + user_mcps: List[MetadataChangeProposalWrapper], + ) -> List[MetadataChangeProposalWrapper]: + """ + Map PowerBI Report Server report to Datahub Dashboard + """ + dashboard_urn = builder.make_dashboard_urn( + self.__config.platform_name, report.get_urn_part() + ) + + chart_urn_list: List[str] = self.to_urn_set(chart_mcps) + user_urn_list: List[Owner] = self.to_ownership_set( + mcps=user_mcps, existing_owners=report.user_info.existing_owners + ) + + def custom_properties( + _report: Report, + ) -> dict: + return { + "workspaceName": "PowerBI Report Server", + "workspaceId": self.__config.host_port, + "createdBy": report.created_by, + "createdDate": str(report.created_date), + "modifiedBy": report.modified_by or "", + "modifiedDate": str(report.modified_date) or str(report.created_date), + "dataSource": str( + [report.connection_string for report in _report.data_sources] + ) + if _report.data_sources + else "", + } + + # DashboardInfo mcp + dashboard_info_cls = DashboardInfoClass( + description=report.description or "", + title=report.name or "", + charts=chart_urn_list, + lastModified=ChangeAuditStamps(), + dashboardUrl=report.get_web_url(self.__config.get_base_url), + customProperties={**custom_properties(report)}, + ) + + info_mcp = self.new_mcp( + entity_type=Constant.DASHBOARD, + entity_urn=dashboard_urn, + aspect_name=Constant.DASHBOARD_INFO, + aspect=dashboard_info_cls, + ) + + # removed status mcp + removed_status_mcp = self.new_mcp( + entity_type=Constant.DASHBOARD, + entity_urn=dashboard_urn, + aspect_name=Constant.STATUS, + aspect=StatusClass(removed=False), + ) + + # dashboardKey mcp + dashboard_key_cls = DashboardKeyClass( + dashboardTool=self.__config.platform_name, + dashboardId=Constant.DASHBOARD_ID.format(report.id), + ) + + # Dashboard key + dashboard_key_mcp = self.new_mcp( + entity_type=Constant.DASHBOARD, + entity_urn=dashboard_urn, + aspect_name=Constant.DASHBOARD_KEY, + aspect=dashboard_key_cls, + ) + + # Dashboard Ownership + owners = [ + OwnerClass(owner=user.owner, type=user.type) for user in user_urn_list + ] + ownership = OwnershipClass(owners=owners) + # Dashboard owner MCP + owner_mcp = self.new_mcp( + entity_type=Constant.DASHBOARD, + entity_urn=dashboard_urn, + aspect_name=Constant.OWNERSHIP, + aspect=ownership, + ) + + # Dashboard browsePaths + browse_path = BrowsePathsClass( + paths=[ + report.get_browse_path( + "powerbi_report_server", + self.__config.host, + self.__config.env, + self.__config.report_virtual_directory_name, + ) + ] + ) + browse_path_mcp = self.new_mcp( + entity_type=Constant.DASHBOARD, + entity_urn=dashboard_urn, + aspect_name=Constant.BROWSERPATH, + aspect=browse_path, + ) + + return [ + browse_path_mcp, + info_mcp, + removed_status_mcp, + dashboard_key_mcp, + owner_mcp, + ] + + def to_datahub_user(self, user: CorpUser) -> List[MetadataChangeProposalWrapper]: + """ + Map PowerBI Report Server user to datahub user + """ + user_mcps = [] + if user: + LOGGER.info("Converting user {} to datahub's user".format(user.username)) + + # Create an URN for User + user_urn = builder.make_user_urn(user.get_urn_part()) + + user_info_instance = CorpUserInfoClass( + displayName=user.properties.display_name, + email=user.properties.email, + title=user.properties.title, + active=True, + ) + + info_mcp = self.new_mcp( + entity_type=Constant.CORP_USER, + entity_urn=user_urn, + aspect_name=Constant.CORP_USER_INFO, + aspect=user_info_instance, + ) + user_mcps.append(info_mcp) + + # removed status mcp + status_mcp = self.new_mcp( + entity_type=Constant.CORP_USER, + entity_urn=user_urn, + aspect_name=Constant.STATUS, + aspect=StatusClass(removed=False), + ) + user_mcps.append(status_mcp) + user_key = CorpUserKeyClass(username=user.username) + + user_key_mcp = self.new_mcp( + entity_type=Constant.CORP_USER, + entity_urn=user_urn, + aspect_name=Constant.CORP_USER_KEY, + aspect=user_key, + ) + user_mcps.append(user_key_mcp) + + return user_mcps + + def to_datahub_work_units(self, report: Report) -> List[EquableMetadataWorkUnit]: + mcps = [] + user_mcps = [] + LOGGER.info("Converting Dashboard={} to DataHub Dashboard".format(report.name)) + # Convert user to CorpUser + user_info = report.user_info.owner_to_add + if user_info: + user_mcps = self.to_datahub_user(user_info) + # Convert tiles to charts + ds_mcps: List[Any] + chart_mcps: List[Any] + # ds_mcps = self.__to_datahub_dataset(report) + chart_mcps, ds_mcps = [], [] # self.to_datahub_chart(dashboard.tiles) + # Lets convert Dashboard to DataHub Dashboard + dashboard_mcps = self.__to_datahub_dashboard(report, chart_mcps, user_mcps) + + # Now add MCPs in sequence + mcps.extend(ds_mcps) + mcps.extend(user_mcps) + mcps.extend(chart_mcps) + mcps.extend(dashboard_mcps) + + # Convert MCP to work_units + work_units = map(self.__to_work_unit, mcps) + # Return set of work_unit + return deduplicate_list([wu for wu in work_units if wu is not None]) + + +@dataclass +class PowerBiReportServerDashboardSourceReport(SourceReport): + scanned_report: int = 0 + filtered_reports: List[str] = dataclass_field(default_factory=list) + + def report_scanned(self, count: int = 1) -> None: + self.scanned_report += count + + def report_dropped(self, view: str) -> None: + self.filtered_reports.append(view) + + +@platform_name("PowerBI") +@config_class(PowerBiReportServerDashboardSourceConfig) +@support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.OWNERSHIP, "Enabled by default") +class PowerBiReportServerDashboardSource(Source): + """ + Use this plugin to connect to [PowerBI Report Server](https://powerbi.microsoft.com/en-us/report-server/). + It extracts the following: + + Metadata that can be ingested: + - report name + - report description + - ownership(can add existing users in DataHub as owners) + - transfer folders structure to DataHub as it is in Report Server + - webUrl to report in Report Server + + Due to limits of PBIRS REST API, it's impossible to ingest next data for now: + - tiles info + - datasource of report + - dataset of report + + Next types of report can be ingested: + - PowerBI report(.pbix) + - Paginated report(.rdl) + - Mobile report + - Linked report + """ + + source_config: PowerBiReportServerDashboardSourceConfig + report: PowerBiReportServerDashboardSourceReport + accessed_dashboards: int = 0 + + def __init__( + self, config: PowerBiReportServerDashboardSourceConfig, ctx: PipelineContext + ): + super().__init__(ctx) + self.source_config = config + self.report = PowerBiReportServerDashboardSourceReport() + self.auth = PowerBiReportServerAPI(self.source_config).get_auth_credentials + self.powerbi_client = PowerBiReportServerAPI(self.source_config) + self.mapper = Mapper(config) + + @classmethod + def create(cls, config_dict, ctx): + config = PowerBiReportServerDashboardSourceConfig.parse_obj(config_dict) + return cls(config, ctx) + + def get_workunits(self) -> Iterable[MetadataWorkUnit]: + """ + Datahub Ingestion framework invoke this method + """ + LOGGER.info("PowerBI Report Server plugin execution is started") + + # Fetch PowerBI Report Server reports for given url + reports = self.powerbi_client.get_all_reports() + + for report in reports: + try: + report.user_info = self.get_user_info(report) + except pydantic.ValidationError as e: + message = "Error ({}) occurred while loading User {}(id={})".format( + e, report.name, report.id + ) + LOGGER.exception(message, e) + self.report.report_warning(report.id, message) + finally: + # Increase Dashboard and tiles count in report + self.report.report_scanned(count=1) + # Convert PowerBi Report Server Dashboard and child entities + # to Datahub work unit to ingest into Datahub + workunits = self.mapper.to_datahub_work_units(report) + for workunit in workunits: + # Add workunit to report + self.report.report_workunit(workunit) + # Return workunit to Datahub Ingestion framework + yield workunit + + def get_user_info(self, report: Any) -> OwnershipData: + existing_ownership: List[OwnerClass] = [] + if not self.source_config.extract_ownership: + return OwnershipData(existing_owners=[], owner_to_add=None) + dashboard_urn = builder.make_dashboard_urn( + self.source_config.platform_name, report.get_urn_part() + ) + user_urn = builder.make_user_urn(report.display_name) + + assert self.ctx.graph + ownership = self.ctx.graph.get_ownership(entity_urn=dashboard_urn) + if ownership: + existing_ownership = ownership.owners + if self.ctx.graph.get_aspect_v2( + entity_urn=user_urn, aspect="corpUserInfo", aspect_type=CorpUserInfoClass + ): + existing_ownership.append( + OwnerClass(owner=user_urn, type=self.source_config.ownership_type) + ) + return OwnershipData(existing_owners=existing_ownership) + user_data = dict( + urn=user_urn, + type=Constant.CORP_USER, + username=report.display_name, + properties=dict(active=True, displayName=report.display_name, email=""), + ) + owner_to_add = CorpUser(**user_data) + return OwnershipData( + existing_owners=existing_ownership, owner_to_add=owner_to_add + ) + + def get_report(self) -> SourceReport: + return self.report + + def close(self): + pass diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py new file mode 100644 index 0000000000000..adcbcaaed96e6 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -0,0 +1,368 @@ +from datetime import datetime +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field, validator + +from datahub.ingestion.source.powerbi_report_server.constants import ( + RelationshipDirection, +) +from datahub.metadata.schema_classes import OwnerClass + + +class CatalogItem(BaseModel): + id: str = Field(alias="Id") + name: str = Field(alias="Name") + description: Optional[str] = Field(alias="Description") + path: str = Field(alias="Path") + type: Any = Field(alias="Type") + hidden: bool = Field(alias="Hidden") + size: int = Field(alias="Size") + modified_by: Optional[str] = Field(alias="ModifiedBy") + modified_date: Optional[datetime] = Field(alias="ModifiedDate") + created_by: Optional[str] = Field(alias="CreatedBy") + created_date: Optional[datetime] = Field(alias="CreatedDate") + parent_folder_id: Optional[str] = Field(alias="ParentFolderId") + content_type: Optional[str] = Field(alias="ContentType") + content: str = Field(alias="Content") + is_favorite: bool = Field(alias="IsFavorite") + user_info: Any = Field(alias="UserInfo") + display_name: Optional[str] = Field(alias="DisplayName") + has_data_sources: bool = Field(default=False, alias="HasDataSources") + data_sources: Optional[List["DataSource"]] = Field( + default_factory=list, alias="DataSources" + ) + + @validator("display_name", always=True) + def validate_diplay_name(cls, value, values): # noqa: N805 + if values["created_by"]: + return values["created_by"].split("\\")[-1] + return "" + + def get_urn_part(self): + return "reports.{}".format(self.id) + + def get_web_url(self, base_reports_url: str) -> str: + return "{}powerbi{}".format(base_reports_url, self.path) + + def get_browse_path( + self, base_folder: str, workspace: str, env: str, report_directory: str + ) -> str: + return "/{}/{}/{}/{}{}".format( + base_folder, env.lower(), workspace, report_directory, self.path + ) + + +class DataSet(CatalogItem): + has_parameters: bool = Field(alias="HasParameters") + query_execution_time_out: int = Field(alias="QueryExecutionTimeOut") + + def get_urn_part(self): + return "datasets.{}".format(self.id) + + def __members(self): + return (self.id,) + + def __eq__(self, instance): + return ( + isinstance(instance, DataSet) and self.__members() == instance.__members() + ) + + def __hash__(self): + return hash(self.__members()) + + +class DataModelDataSource(BaseModel): + auth_type: Optional[str] = Field(alias="AuthType") + supported_auth_types: List[Optional[str]] = Field(alias="SupportedAuthTypes") + kind: str = Field(alias="Kind") + model_connection_name: str = Field(alias="ModelConnectionName") + secret: str = Field(alias="Secret") + type: Optional[str] = Field(alias="Type") + username: str = Field(alias="Username") + + +class CredentialsByUser(BaseModel): + display_text: str = Field(alias="DisplayText") + use_as_windows_credentials: bool = Field(alias="UseAsWindowsCredentials") + + +class CredentialsInServer(BaseModel): + username: str = Field(alias="UserName") + password: str = Field(alias="Password") + use_as_windows_credentials: bool = Field(alias="UseAsWindowsCredentials") + impersonate_authenticated_user: bool = Field(alias="ImpersonateAuthenticatedUser") + + +class ParameterValue(BaseModel): + name: str = Field(alias="Name") + value: str = Field(alias="Value") + is_value_field_reference: str = Field(alias="IsValueFieldReference") + + +class ExtensionSettings(BaseModel): + extension: str = Field(alias="Extension") + parameter_values: ParameterValue = Field(alias="ParameterValues") + + +class Subscription(BaseModel): + id: str = Field(alias="Id") + owner: str = Field(alias="Owner") + is_data_driven: bool = Field(alias="IsDataDriven") + description: str = Field(alias="Description") + report: str = Field(alias="Report") + is_active: bool = Field(alias="IsActive") + event_type: str = Field(alias="EventType") + schedule_description: str = Field(alias="ScheduleDescription") + last_run_time: datetime = Field(alias="LastRunTime") + last_status: str = Field(alias="LastStatus") + extension_settings: ExtensionSettings = Field(alias="ExtensionSettings") + delivery_extension: str = Field(alias="DeliveryExtension") + localized_delivery_extension_name: str = Field( + alias="LocalizedDeliveryExtensionName" + ) + modified_by: str = Field(alias="ModifiedBy") + modified_date: datetime = Field(alias="ModifiedDate") + parameter_values: ParameterValue = Field(alias="ParameterValues") + + +class MetaData(BaseModel): + is_relational: bool + + +class DataSource(CatalogItem): + name: str = Field(default="", alias="Name") + path: str = Field(default="", alias="Path") + is_enabled: bool = Field(alias="IsEnabled") + connection_string: str = Field(alias="ConnectionString") + data_model_data_source: Optional[DataModelDataSource] = Field( + alias="DataModelDataSource" + ) + data_source_sub_type: Optional[str] = Field(alias="DataSourceSubType") + data_source_type: Optional[str] = Field(alias="DataSourceType") + is_original_connection_string_expression_based: bool = Field( + alias="IsOriginalConnectionStringExpressionBased" + ) + is_connection_string_overridden: bool = Field(alias="IsConnectionStringOverridden") + credentials_by_user: Optional[CredentialsByUser] = Field(alias="CredentialsByUser") + credentials_in_server: Optional[CredentialsInServer] = Field( + alias="CredentialsInServer" + ) + is_reference: bool = Field(alias="IsReference") + subscriptions: Optional[Subscription] = Field(alias="Subscriptions") + meta_data: Optional[MetaData] = Field(alias="MetaData") + + def __members(self): + return (self.id,) + + def __eq__(self, instance): + return ( + isinstance(instance, DataSource) + and self.__members() == instance.__members() + ) + + def __hash__(self): + return hash(self.__members()) + + +class Comment(BaseModel): + id: str = Field(alias="Id") + item_id: str = Field(alias="ItemId") + username: str = Field(alias="UserName") + thread_id: str = Field(alias="ThreadId") + attachment_path: str = Field(alias="AttachmentPath") + text: str = Field(alias="Text") + created_date: datetime = Field(alias="CreatedDate") + modified_date: datetime = Field(alias="ModifiedDate") + + +class ExcelWorkbook(CatalogItem): + comments: Comment = Field(alias="Comments") + + +class Role(BaseModel): + name: str = Field(alias="Name") + description: str = Field(alias="Description") + + +class SystemPolicies(BaseModel): + group_user_name: str = Field(alias="GroupUserName") + roles: List[Role] = Field(alias="Roles") + + +class Report(CatalogItem): + has_data_sources: bool = Field(alias="HasDataSources") + has_shared_data_sets: bool = Field(alias="HasSharedDataSets") + has_parameters: bool = Field(alias="HasParameters") + + +class PowerBiReport(CatalogItem): + has_data_sources: bool = Field(alias="HasDataSources") + + +class Extension(BaseModel): + extension_type: str = Field(alias="ExtensionType") + name: str = Field(alias="Name") + localized_name: str = Field(alias="LocalizedName") + Visible: bool = Field(alias="Visible") + + +class Folder(CatalogItem): + """Folder""" + + +class DrillThroughTarget(BaseModel): + drill_through_target_type: str = Field(alias="DrillThroughTargetType") + + +class Value(BaseModel): + value: str = Field(alias="Value") + goal: int = Field(alias="Goal") + status: int = Field(alias="Status") + trend_set: List[int] = Field(alias="TrendSet") + + +class Kpi(CatalogItem): + value_format: str = Field(alias="ValueFormat") + visualization: str = Field(alias="Visualization") + drill_through_target: DrillThroughTarget = Field(alias="DrillThroughTarget") + currency: str = Field(alias="Currency") + values: Value = Field(alias="Values") + data: Dict[str, str] = Field(alias="Data") + + +class LinkedReport(CatalogItem): + has_parameters: bool = Field(alias="HasParameters") + link: str = Field(alias="Link") + + +class Manifest(BaseModel): + resources: List[Dict[str, List]] = Field(alias="Resources") + + +class MobileReport(CatalogItem): + allow_caching: bool = Field(alias="AllowCaching") + manifest: Manifest = Field(alias="Manifest") + + +class PowerBIReport(CatalogItem): + has_data_sources: bool = Field(alias="HasDataSources") + + +class Resources(CatalogItem): + """Resources""" + + +class System(BaseModel): + report_server_absolute_url: str = Field(alias="ReportServerAbsoluteUrl") + report_server_relative_url: str = Field(alias="ReportServerRelativeUrl") + web_portal_relative_url: str = Field(alias="WebPortalRelativeUrl") + product_name: str = Field(alias="ProductName") + product_version: str = Field(alias="ProductVersion") + product_type: str = Field(alias="ProductType") + time_zone: str = Field(alias="TimeZone") + + +class Owner(BaseModel): + owner: str + type: str + + def __members(self): + return self.owner, self.type + + def __eq__(self, instance): + return isinstance(instance, Owner) and self.__members() == instance.__members() + + def __hash__(self): + return hash(self.__members()) + + +class CorpUserEditableInfo(BaseModel): + display_name: str = Field(alias="displayName") + title: str + about_me: Optional[str] = Field(alias="aboutMe") + teams: Optional[List[str]] + skills: Optional[List[str]] + picture_link: Optional[str] = Field(alias="pictureLink") + + +class CorpUserEditableProperties(CorpUserEditableInfo): + slack: Optional[str] + phone: Optional[str] + email: str + + +class CorpUserStatus(BaseModel): + active: bool + + +class GlobalTags(BaseModel): + tags: List[str] + + +class EntityRelationship(BaseModel): + type: str + direction: RelationshipDirection + entity: str + created: datetime + + +class EntityRelationshipsResult(BaseModel): + start: int + count: int + total: int + relationships: Optional[EntityRelationship] + + +class CorpUserProperties(BaseModel): + active: bool + display_name: str = Field(alias="displayName") + email: str + title: Optional[str] + manager: Optional["CorpUser"] + department_id: Optional[int] = Field(alias="departmentId") + department_name: Optional[str] = Field(alias="departmentName") + first_name: Optional[str] = Field(alias="firstName") + last_name: Optional[str] = Field(alias="lastName") + full_name: Optional[str] = Field(alias="fullName") + country_code: Optional[str] = Field(alias="countryCode") + + +class CorpUser(BaseModel): + urn: str + type: str + username: str + properties: CorpUserProperties + editable_properties: Optional[CorpUserEditableProperties] = Field( + alias="editableProperties" + ) + status: Optional[CorpUserStatus] + tags: Optional[GlobalTags] + relationships: Optional[EntityRelationshipsResult] + editableInfo: Optional[CorpUserEditableInfo] = Field(alias="editableInfo") + global_tags: Optional[GlobalTags] = Field(alias="globalTags") + + def get_urn_part(self): + return "{}".format(self.username) + + def __members(self): + return (self.username,) + + def __eq__(self, instance): + return ( + isinstance(instance, CorpUser) and self.__members() == instance.__members() + ) + + def __hash__(self): + return hash(self.__members()) + + +class OwnershipData(BaseModel): + existing_owners: Optional[List[OwnerClass]] = [] + owner_to_add: Optional[CorpUser] + + class Config: + arbitrary_types_allowed = True + + +CatalogItem.update_forward_refs() +CorpUserProperties.update_forward_refs() diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json new file mode 100644 index 0000000000000..9b202baa947d5 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json @@ -0,0 +1,450 @@ +[ +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "value": "{\"username\": \"TEST_USER\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testa\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testa\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testa\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "value": "{\"username\": \"TEST_USER\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testb\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testb\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testb\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938b\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "value": "{\"username\": \"TEST_USER\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testc\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testc\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testc\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "value": "{\"username\": \"TEST_USER\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testd\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testd\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testd\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py new file mode 100644 index 0000000000000..2dedfb091392e --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -0,0 +1,217 @@ +from datetime import datetime +from unittest import mock + +from freezegun import freeze_time + +from datahub.ingestion.run.pipeline import Pipeline +from datahub.metadata.schema_classes import AuditStampClass, OwnerClass, OwnershipClass +from tests.test_helpers import mce_helpers + +FROZEN_TIME = "2022-02-03 07:00:00" + + +def mock_existing_users(*args, **kwargs): + return OwnershipClass( + owners=[ + OwnerClass.from_obj( + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER", + "source": None, + } + ) + ], + lastModified=AuditStampClass.from_obj( + {"time": 0, "actor": "urn:li:corpuser:unknown", "impersonator": None} + ), + ) + + +def mock_user_to_add(*args, **kwargs): + return None + + +def register_mock_api(request_mock): + api_vs_response = { + "https://host_port/Reports/api/v2.0/Reports": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938a", + "Name": "Testa", + "Description": None, + "Path": "/path/to/Testa", + "Type": "Report", + "Hidden": False, + "Size": 1010101, + "ModifiedBy": "TEST_USER", + "ModifiedDate": str(datetime.now()), + "CreatedBy": "TEST_USER", + "CreatedDate": str(datetime.now()), + "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239ca", + "IsFavorite": False, + "ContentType": None, + "Content": "", + "HasDataSources": True, + "Roles": [], + "HasSharedDataSets": True, + "HasParameters": True, + }, + ] + }, + }, + "https://host_port/Reports/api/v2.0/MobileReports": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938b", + "Name": "Testb", + "Description": None, + "Path": "/path/to/Testb", + "Type": "MobileReport", + "Hidden": False, + "Size": 1010101, + "ModifiedBy": "TEST_USER", + "ModifiedDate": str(datetime.now()), + "CreatedBy": "TEST_USER", + "CreatedDate": str(datetime.now()), + "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cb", + "IsFavorite": False, + "ContentType": None, + "Content": "", + "HasDataSources": True, + "Roles": [], + "HasSharedDataSets": True, + "HasParameters": True, + "AllowCaching": True, + "Manifest": {"Resources": []}, + }, + ] + }, + }, + "https://host_port/Reports/api/v2.0/LinkedReports": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938c", + "Name": "Testc", + "Description": None, + "Path": "/path/to/Testc", + "Type": "LinkedReport", + "Hidden": False, + "Size": 1010101, + "ModifiedBy": "TEST_USER", + "ModifiedDate": str(datetime.now()), + "CreatedBy": "TEST_USER", + "CreatedDate": str(datetime.now()), + "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cc", + "IsFavorite": False, + "ContentType": None, + "Content": "", + "HasDataSources": True, + "Roles": [], + "HasParameters": True, + "Link": "sjfgnk-7134-1234-abcd-ee5axvcv938b", + }, + ] + }, + }, + "https://host_port/Reports/api/v2.0/PowerBIReports": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938d", + "Name": "Testd", + "Description": None, + "Path": "/path/to/Testd", + "Type": "PowerBIReport", + "Hidden": False, + "Size": 1010101, + "ModifiedBy": "TEST_USER", + "ModifiedDate": str(datetime.now()), + "CreatedBy": "TEST_USER", + "CreatedDate": str(datetime.now()), + "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cd", + "IsFavorite": False, + "ContentType": None, + "Content": "", + "HasDataSources": True, + "Roles": [], + }, + ] + }, + }, + } + + for url in api_vs_response.keys(): + request_mock.register_uri( + api_vs_response[url]["method"], + url, + json=api_vs_response[url]["json"], + status_code=api_vs_response[url]["status_code"], + ) + + +def default_source_config(): + return { + "username": "foo", + "password": "bar", + "workstation_name": "workstation", + "host_port": "host_port", + "server_alias": "server_alias", + "graphql_url": "http://localhost:8080/api/graphql", + "report_virtual_directory_name": "Reports", + "report_server_virtual_directory_name": "ReportServer", + "env": "DEV", + } + + +@freeze_time(FROZEN_TIME) +@mock.patch("requests_ntlm.HttpNtlmAuth") +def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): + test_resources_dir = ( + pytestconfig.rootpath / "tests/integration/powerbi_report_server" + ) + + register_mock_api(request_mock=requests_mock) + + pipeline = Pipeline.create( + { + "run_id": "powerbi-report-server-test", + "source": { + "type": "powerbi-report-server", + "config": { + **default_source_config(), + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_report_server_mces.json", + }, + }, + } + ) + pipeline.ctx.graph = mock.MagicMock() + pipeline.ctx.graph.get_ownership = mock.MagicMock() + pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users + pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() + pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + pipeline.run() + pipeline.raise_from_status() + mce_out_file = "golden_test_ingest.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "powerbi_report_server_mces.json", + golden_path=f"{test_resources_dir}/{mce_out_file}", + ) diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json index f0a9dd37f0993..42720d1de0cb2 100644 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ b/metadata-service/war/src/main/resources/boot/data_platforms.json @@ -525,4 +525,4 @@ "logoUrl": "/assets/platforms/databrickslogo.png" } } -] \ No newline at end of file +]