Skip to content

Commit

Permalink
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Browse files Browse the repository at this point in the history
…estion' into jj--add-structured-logging-to-ingestion
  • Loading branch information
John Joyce authored and John Joyce committed Jul 2, 2024
2 parents 47445c8 + da0739e commit 023ad85
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 299 deletions.
162 changes: 3 additions & 159 deletions datahub-web-react/src/app/ingest/source/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,71 +16,16 @@ export enum StructuredReportItemLevel {
INFO,
}

/**
* A set of standard / well supported warnings or error types
*/
export enum StructuredReportItemType {
/**
* Unauthorized to scan a specific part of the source - database, schema, project, or etc or a specific asset.
*/
SCAN_UNAUTHORIZED,
/**
* Unauthorized to access lineage information.
*/
LINEAGE_UNAUTHORIZED,
/**
* Unauthorized to access usage information - recent queries.
*/
USAGE_UNAUTHORIZED,
/**
* Unauthorized to profile some tables.
*/
PROFILING_UNAUTHORIZED,
/**
* Failure to parse some queries to extract column or asset-level lineage.
*/
LINEAGE_QUERY_PARSING_FAILED,
/**
* Failure to parse some queries
*/
USAGE_QUERY_PARSING_FAILED,
/**
* Failure to connect to the data source due to malformed connection details
*/
CONNECTION_FAILED_COORDINATES,
/**
* Failure to connect to the data source due to bad credentials
*/
CONNECTION_FAILED_CREDENTIALS,
/**
* Failure to connect to the data source due to unavailability of 3rd party service.
*/
CONNECTION_FAILED_SERVICE_UNAVAILABLE,
/**
* Failure to connect to the data source due to a client-side timeout.
*/
CONNECTION_FAILED_SERVICE_TIMEOUT,
/**
* Failure to connect to the data source for an unknown reason.
*/
CONNECTION_FAILED_UNKNOWN,
/**
* Fallback type for unrecognized structured report lines.
*/
UNKNOWN,
}

/**
* A type describing an individual warning / failure item in a structured report.
*
* TODO: Determine whether we need a message field to be reported!
*/
export interface StructuredReportItem {
export interface StructuredReportLogEntry {
level: StructuredReportItemLevel; // The "log level"
title: string; // The "well-supported" or standardized title
title?: string; // The "well-supported" or standardized title
message: string; // The message to display associated with the error.
context: string[]; // The context of WHERE the issue was encountered, as a string.
rawType: string; // The "raw type" string received from the ingestion backend.
}

/**
Expand All @@ -90,106 +35,5 @@ export interface StructuredReport {
infoCount: number;
errorCount: number;
warnCount: number;
items: StructuredReportItem[];
items: StructuredReportLogEntry[];
}

/**
* A mapping of the frontend standardized error types to their messages and the raw backend error types that they are mapped from.
*/
export const STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS = [
{
type: StructuredReportItemType.UNKNOWN,
title: 'An unexpected issue occurred',
},
{
type: StructuredReportItemType.SCAN_UNAUTHORIZED,
title: 'Unauthorized to scan some assets',
message: 'The provided credential details were unauthorized to scan some assets in the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.LINEAGE_UNAUTHORIZED,
title: 'Unauthorized to extract some lineage',
message:
'The provided credential details were unauthorized to extract some asset lineage from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.USAGE_UNAUTHORIZED,
title: 'Unauthorized to extract some usage',
message:
'The provided credential details were unauthorized to extract some asset usage information from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.PROFILING_UNAUTHORIZED,
title: 'Unauthorized to extract some data statistics',
message:
'The provided credential details were unauthorized to extract some asset profiles or statistics from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.LINEAGE_QUERY_PARSING_FAILED,
title: 'Failed to extract some lineage',
message: 'Failed to extract lineage for some assets due to failed query parsing.',
rawTypes: [],
},
{
type: StructuredReportItemType.USAGE_QUERY_PARSING_FAILED,
title: 'Failed to extract some usage',
message: 'Failed to extract usage or popularity for some assets due to failed query parsing.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_COORDINATES,
title: 'Failed to connect using provided details',
message:
'Failed to connect to data source. Unable to establish a connection to the specified service. Please check the connection details.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_CREDENTIALS,
title: 'Failed to connect using provided credentials',
message:
'Failed to connect to data source. Unable to authenticate with the specified service using the provided credentials. Please check the connection credentials.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_SERVICE_UNAVAILABLE,
title: 'Service unavailable',
message: 'Failed to connect to the data source. The service is currently unavailable.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_SERVICE_TIMEOUT,
title: 'Service timeout',
message:
'Failed to connect to the data source. A timeout was encountered when attempting to extract data from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_UNKNOWN,
title: 'Unknown connection error',
message: 'Failed to connect to the data source for an unknown reason. Please check the connection details.',
rawTypes: [],
},
];

/**
* Map raw type to details associated above.
*/
export const STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS = new Map();
STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS.forEach((details) => {
const rawTypes = details.rawTypes || [];
rawTypes.forEach((rawType) => {
STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.set(rawType, details);
});
});

/**
* Map std type to details associated above.
*/
export const STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS = new Map();
STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS.forEach((details) => {
STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.set(details.type, details);
});
98 changes: 34 additions & 64 deletions datahub-web-react/src/app/ingest/source/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,7 @@ import EntityRegistry from '../../entity/EntityRegistry';
import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
import { SourceConfig } from './builder/types';
import {
STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS,
STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS,
StructuredReport,
StructuredReportItem,
StructuredReportItemLevel,
StructuredReportItemType,
} from './types';
import { StructuredReport, StructuredReportLogEntry, StructuredReportItemLevel } from './types';

export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => {
const sourceConfigs = ingestionSources.find((source) => source.name === sourceType);
Expand Down Expand Up @@ -135,7 +128,9 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => {
export const validateURL = (fieldName: string) => {
return {
validator(_, value) {
const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[a-zA-Z0-9.-]{2,})+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/);
const URLPattern = new RegExp(
/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[a-zA-Z0-9.-]{2,})+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/,
);
const isURLValid = URLPattern.test(value);
if (!value || isURLValid) {
return Promise.resolve();
Expand All @@ -145,36 +140,12 @@ export const validateURL = (fieldName: string) => {
};
};

const tryMapRawTypeToStructuredTypeByName = (rawType: string): StructuredReportItemType => {
const normalizedType = rawType.toLocaleUpperCase();
return (
StructuredReportItemType[normalizedType as keyof typeof StructuredReportItemType] ||
StructuredReportItemType.UNKNOWN
);
};

const getStructuredReportItemType = (rawType: string): StructuredReportItemType => {
return STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.has(rawType)
? STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.get(rawType).type
: tryMapRawTypeToStructuredTypeByName(rawType);
};

const getStructuredReportItemTitle = (rawType: string): string => {
const type = getStructuredReportItemType(rawType);
return STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.get(type)?.title;
};

const getStructuredReportItemLevel = (rawLevel: string) => {
const normalizedLevel = rawLevel.toLocaleUpperCase();
return StructuredReportItemLevel[normalizedLevel as keyof typeof StructuredReportItemType];
return StructuredReportItemLevel[normalizedLevel as keyof typeof StructuredReportItemLevel];
};

const getStructuredReportItemMessage = (rawType: string): string => {
const stdType = getStructuredReportItemType(rawType);
return StructuredReportItemType.UNKNOWN ? rawType : STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.get(stdType)?.message;
};

const createStructuredReport = (items: StructuredReportItem[]): StructuredReport => {
const createStructuredReport = (items: StructuredReportLogEntry[]): StructuredReport => {
const errorCount = items.filter((item) => item.level === StructuredReportItemLevel.ERROR).length;
const warnCount = items.filter((item) => item.level === StructuredReportItemLevel.WARN).length;
const infoCount = items.filter((item) => item.level === StructuredReportItemLevel.INFO).length;
Expand All @@ -191,61 +162,60 @@ const transformToStructuredReport = (structuredReportObj: any): StructuredReport
return null;
}

/* Legacy help function to map backend failure or warning ingestion objects into StructuredReportItems */
/* Legacy helper function to map backend failure or warning ingestion objects into StructuredReportLogEntry[] */
const mapItemObject = (
items: { [key: string]: string[] },
level: StructuredReportItemLevel,
): StructuredReportItem[] => {
return Object.entries(items).map(([rawType, context]) => ({
): StructuredReportLogEntry[] => {
return Object.entries(items).map(([rawMessage, context]) => ({
level,
title: getStructuredReportItemTitle(rawType),
message: getStructuredReportItemMessage(rawType),
title: 'An unexpected issue occurred',
message: rawMessage,
context,
rawType,
}));
};

/* V2 help function to map backend failure or warning lists into StructuredReportItems */
const mapItemArray = (items): StructuredReportItem[] => {
/* V2 helper function to map backend failure or warning lists into StructuredReportLogEntry[] */
const mapItemArray = (items): StructuredReportLogEntry[] => {
return items.map((item) => ({
level: getStructuredReportItemLevel(item.level),
title: getStructuredReportItemTitle(item.type),
message: !item.message ? getStructuredReportItemMessage(item.type) : item.message,
title: item.title || 'An unexpected issue occurred',
message: item.message,
context: item.context,
rawType: item.type,
}));
};

const sourceReport = structuredReportObj.source?.report;
try {
const sourceReport = structuredReportObj.source?.report;

if (!sourceReport) {
return null;
}
if (!sourceReport) {
return null;
}

// extract the report.
let structuredReport: StructuredReport;

if (sourceReport.structured_logs) {
// If the report has NEW structured logs fields, use that field.
structuredReport = createStructuredReport(mapItemArray(sourceReport.structured_logs || []));
} else {
// Else fallback to using the legacy fields
const failures = sourceReport.failure_list
const failures = Array.isArray(sourceReport.failures)
? /* Use V2 failureList if present */
mapItemArray(sourceReport.failure_list || [])
mapItemArray(sourceReport.failures || [])
: /* Else use the legacy object type */
mapItemObject(sourceReport.failures || {}, StructuredReportItemLevel.ERROR);

const warnings = sourceReport.warning_list
const warnings = Array.isArray(sourceReport.warnings)
? /* Use V2 warning if present */
mapItemArray(sourceReport.warning_list || [])
mapItemArray(sourceReport.warnings || [])
: /* Else use the legacy object type */
mapItemObject(sourceReport.warnings || {}, StructuredReportItemLevel.WARN);

structuredReport = createStructuredReport([...failures, ...warnings]);
}
const infos = Array.isArray(sourceReport.infos)
? /* Use V2 infos if present */
mapItemArray(sourceReport.infos || [])
: /* Else use the legacy object type */
mapItemObject(sourceReport.infos || {}, StructuredReportItemLevel.INFO);

return structuredReport;
return createStructuredReport([...failures, ...warnings, ...infos]);
} catch (e) {
console.warn('Failed to extract structured report from ingestion report!', e);
return null;
}
};

export const getStructuredReport = (result: Partial<ExecutionRequestResult>): StructuredReport | null => {
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/ingestion/api/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def report_failure(
context_list.append(context)
self._errors[log_key] = StructuredLog(
level=StructuredLogLevel.ERROR,
type=title,
title=title,
message=message,
context=context_list,
stacktrace=stacktrace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def construct_schema_metadata(
if schema_size > MAX_SCHEMA_SIZE:
# downsample the schema, using frequency as the sort key
self.report.report_warning(
type="Schema Size Too Large",
title="Schema Size Too Large",
message=f"Downsampling the table schema because MAX_SCHEMA_SIZE threshold is {MAX_SCHEMA_SIZE}",
context=f"Collection: {dataset_urn}",
)
Expand Down Expand Up @@ -536,7 +536,7 @@ def get_native_type(self, attribute_type: Union[type, str], table_name: str) ->
)
if type_string is None:
self.report.report_warning(
type="Unable to Map Attribute Type",
title="Unable to Map Attribute Type",
message=f"Unable to map type {attribute_type} to native data type",
context=f"Collection: {table_name}",
)
Expand All @@ -553,7 +553,7 @@ def get_field_type(

if type_class is None:
self.report.report_warning(
type="Unable to Map Field Type",
title="Unable to Map Field Type",
message=f"Unable to map type {attribute_type} to metadata schema field type",
context=f"Collection: {table_name}",
)
Expand Down
Loading

0 comments on commit 023ad85

Please sign in to comment.