[8.x] [Auto Import] CSV format support (#194386) (#196090)

# Backport This will backport the following commits from `main` to `8.x`: - [[Auto Import] CSV format support (#194386)](#194386)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Ilya Nikokoshev <[email protected]>
elastic · Oct 14, 2024 · 6378ff3 · 6378ff3
1 parent 7a80e6f
commit 6378ff3
Show file tree

Hide file tree

Showing 47 changed files with 853 additions and 132 deletions.
diff --git a/x-pack/plugins/integration_assistant/__jest__/fixtures/log_type_detection.ts b/x-pack/plugins/integration_assistant/__jest__/fixtures/log_type_detection.ts
@@ -14,6 +14,8 @@ export const logFormatDetectionTestState = {
   exAnswer: 'testanswer',
   packageName: 'testPackage',
   dataStreamName: 'testDatastream',
+  packageTitle: 'Test Title',
+  dataStreamTitle: 'Test Datastream Title',
   finalized: false,
   samplesFormat: { name: SamplesFormatName.Values.structured },
   header: true,

diff --git a/x-pack/plugins/integration_assistant/common/api/analyze_logs/analyze_logs_route.gen.ts b/x-pack/plugins/integration_assistant/common/api/analyze_logs/analyze_logs_route.gen.ts
@@ -19,6 +19,8 @@ import { z } from '@kbn/zod';
 import {
   PackageName,
   DataStreamName,
+  PackageTitle,
+  DataStreamTitle,
   LogSamples,
   Connector,
   LangSmithOptions,
@@ -29,6 +31,8 @@ export type AnalyzeLogsRequestBody = z.infer<typeof AnalyzeLogsRequestBody>;
 export const AnalyzeLogsRequestBody = z.object({
   packageName: PackageName,
   dataStreamName: DataStreamName,
+  packageTitle: PackageTitle,
+  dataStreamTitle: DataStreamTitle,
   logSamples: LogSamples,
   connectorId: Connector,
   langSmithOptions: LangSmithOptions.optional(),

diff --git a/x-pack/plugins/integration_assistant/common/api/analyze_logs/analyze_logs_route.schema.yaml b/x-pack/plugins/integration_assistant/common/api/analyze_logs/analyze_logs_route.schema.yaml
@@ -22,11 +22,17 @@ paths:
                 - connectorId
                 - packageName
                 - dataStreamName
+                - packageTitle
+                - dataStreamTitle
               properties:
                 packageName:
                   $ref: "../model/common_attributes.schema.yaml#/components/schemas/PackageName"
                 dataStreamName:
                   $ref: "../model/common_attributes.schema.yaml#/components/schemas/DataStreamName"
+                packageTitle:
+                  $ref: "../model/common_attributes.schema.yaml#/components/schemas/PackageTitle"
+                dataStreamTitle:
+                  $ref: "../model/common_attributes.schema.yaml#/components/schemas/DataStreamTitle"
                 logSamples:
                   $ref: "../model/common_attributes.schema.yaml#/components/schemas/LogSamples"
                 connectorId:

diff --git a/x-pack/plugins/integration_assistant/common/api/generation_error.ts b/x-pack/plugins/integration_assistant/common/api/generation_error.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { GenerationErrorCode } from '../constants';
+
+// Errors raised by the generation process should provide information through this interface.
+export interface GenerationErrorBody {
+  message: string;
+  attributes: GenerationErrorAttributes;
+}
+
+export function isGenerationErrorBody(obj: unknown | undefined): obj is GenerationErrorBody {
+  return (
+    typeof obj === 'object' &&
+    obj !== null &&
+    'message' in obj &&
+    typeof obj.message === 'string' &&
+    'attributes' in obj &&
+    obj.attributes !== undefined &&
+    isGenerationErrorAttributes(obj.attributes)
+  );
+}
+
+export interface GenerationErrorAttributes {
+  errorCode: GenerationErrorCode;
+  underlyingMessages: string[] | undefined;
+}
+
+export function isGenerationErrorAttributes(obj: unknown): obj is GenerationErrorAttributes {
+  return (
+    typeof obj === 'object' &&
+    obj !== null &&
+    'errorCode' in obj &&
+    typeof obj.errorCode === 'string' &&
+    (!('underlyingMessages' in obj) || Array.isArray(obj.underlyingMessages))
+  );
+}
diff --git a/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts b/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts
@@ -96,6 +96,8 @@ export const getRelatedRequestMock = (): RelatedRequestBody => ({
 export const getAnalyzeLogsRequestBody = (): AnalyzeLogsRequestBody => ({
   dataStreamName: 'test-data-stream-name',
   packageName: 'test-package-name',
+  packageTitle: 'Test package title',
+  dataStreamTitle: 'Test data stream title',
   connectorId: 'test-connector-id',
   logSamples: rawSamples,
 });
diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.gen.ts b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.gen.ts
@@ -31,6 +31,18 @@ export const PackageName = z.string().min(1);
 export type DataStreamName = z.infer<typeof DataStreamName>;
 export const DataStreamName = z.string().min(1);
 
+/**
+ * Package title for the integration to be built.
+ */
+export type PackageTitle = z.infer<typeof PackageTitle>;
+export const PackageTitle = z.string().min(1);
+
+/**
+ * DataStream title for the integration to be built.
+ */
+export type DataStreamTitle = z.infer<typeof DataStreamTitle>;
+export const DataStreamTitle = z.string().min(1);
+
 /**
  * String form of the input logsamples.
  */
@@ -86,6 +98,14 @@ export const SamplesFormat = z.object({
    * For some formats, specifies whether the samples can be multiline.
    */
   multiline: z.boolean().optional(),
+  /**
+   * For CSV format, specifies whether the samples have a header row. For other formats, specifies the presence of header in each row.
+   */
+  header: z.boolean().optional(),
+  /**
+   * For CSV format, specifies the column names proposed by the LLM.
+   */
+  columns: z.array(z.string()).optional(),
   /**
    * For a JSON format, describes how to get to the sample array from the root of the JSON.
    */

diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml
@@ -16,6 +16,16 @@ components:
       minLength: 1
       description: DataStream name for the integration to be built.
 
+    PackageTitle:
+      type: string
+      minLength: 1
+      description: Package title for the integration to be built.
+
+    DataStreamTitle:
+      type: string
+      minLength: 1
+      description: DataStream title for the integration to be built.
+
     LogSamples:
       type: array
       items:
@@ -66,6 +76,14 @@ components:
         multiline:
           type: boolean
           description: For some formats, specifies whether the samples can be multiline.
+        header: 
+          type: boolean
+          description: For CSV format, specifies whether the samples have a header row. For other formats, specifies the presence of header in each row.
+        columns:
+          type: array
+          description: For CSV format, specifies the column names proposed by the LLM.
+          items:
+            type: string
         json_path:
           type: array
           description: For a JSON format, describes how to get to the sample array from the root of the JSON.

diff --git a/x-pack/plugins/integration_assistant/common/constants.ts b/x-pack/plugins/integration_assistant/common/constants.ts
@@ -30,8 +30,9 @@ export const MINIMUM_LICENSE_TYPE: LicenseType = 'enterprise';
 
 // ErrorCodes
 
-export enum ErrorCode {
+export enum GenerationErrorCode {
   RECURSION_LIMIT = 'recursion-limit',
   RECURSION_LIMIT_ANALYZE_LOGS = 'recursion-limit-analyze-logs',
   UNSUPPORTED_LOG_SAMPLES_FORMAT = 'unsupported-log-samples-format',
+  UNPARSEABLE_CSV_DATA = 'unparseable-csv-data',
 }
diff --git a/x-pack/plugins/integration_assistant/common/index.ts b/x-pack/plugins/integration_assistant/common/index.ts
@@ -27,10 +27,9 @@ export type {
   Integration,
   Pipeline,
   Docs,
-  SamplesFormat,
   LangSmithOptions,
 } from './api/model/common_attributes.gen';
-export { SamplesFormatName } from './api/model/common_attributes.gen';
+export { SamplesFormat, SamplesFormatName } from './api/model/common_attributes.gen';
 export type { ESProcessorItem } from './api/model/processor_attributes.gen';
 export type { CelInput } from './api/model/cel_input_attributes.gen';
 

diff --git a/...integration/create_integration_assistant/steps/data_stream_step/generation_modal.test.tsx b/...integration/create_integration_assistant/steps/data_stream_step/generation_modal.test.tsx
@@ -105,6 +105,8 @@ describe('GenerationModal', () => {
     it('should call runAnalyzeLogsGraph with correct parameters', () => {
       expect(mockRunAnalyzeLogsGraph).toHaveBeenCalledWith({
         ...defaultRequest,
+        packageTitle: 'Mocked Integration title',
+        dataStreamTitle: 'Mocked Data Stream Title',
         logSamples: integrationSettingsNonJSON.logSamples ?? [],
       });
     });

diff --git a/...eate_integration/create_integration_assistant/steps/data_stream_step/generation_modal.tsx b/...eate_integration/create_integration_assistant/steps/data_stream_step/generation_modal.tsx
@@ -82,7 +82,7 @@ export const GenerationModal = React.memo<GenerationModalProps>(
                 {error ? (
                   <EuiFlexItem>
                     <EuiCallOut
-                      title={i18n.GENERATION_ERROR(progressText[progress])}
+                      title={i18n.GENERATION_ERROR_TITLE(progressText[progress])}
                       color="danger"
                       iconType="alert"
                       data-test-subj="generationErrorCallout"

diff --git a/...ate_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx b/...ate_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx
@@ -318,9 +318,6 @@ export const SampleLogsInput = React.memo<SampleLogsInputProps>(({ integrationSe
               <EuiText size="s" textAlign="center">
                 {i18n.LOGS_SAMPLE_DESCRIPTION}
               </EuiText>
-              <EuiText size="xs" color="subdued" textAlign="center">
-                {i18n.LOGS_SAMPLE_DESCRIPTION_2}
-              </EuiText>
             </>
           }
           onChange={onChangeLogsSample}

diff --git a/...ts/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts b/...ts/create_integration/create_integration_assistant/steps/data_stream_step/translations.ts
@@ -6,7 +6,8 @@
  */
 
 import { i18n } from '@kbn/i18n';
-import { ErrorCode } from '../../../../../../common/constants';
+import { GenerationErrorCode } from '../../../../../../common/constants';
+import type { GenerationErrorAttributes } from '../../../../../../common/api/generation_error';
 
 export const INTEGRATION_NAME_TITLE = i18n.translate(
   'xpack.integrationAssistant.step.dataStream.integrationNameTitle',
@@ -109,12 +110,6 @@ export const LOGS_SAMPLE_DESCRIPTION = i18n.translate(
     defaultMessage: 'Drag and drop a file or Browse files.',
   }
 );
-export const LOGS_SAMPLE_DESCRIPTION_2 = i18n.translate(
-  'xpack.integrationAssistant.step.dataStream.logsSample.description2',
-  {
-    defaultMessage: 'JSON/NDJSON format',
-  }
-);
 export const LOGS_SAMPLE_TRUNCATED = (maxRows: number) =>
   i18n.translate('xpack.integrationAssistant.step.dataStream.logsSample.truncatedWarning', {
     values: { maxRows },
@@ -188,7 +183,7 @@ export const PROGRESS_RELATED_GRAPH = i18n.translate(
     defaultMessage: 'Generating related fields',
   }
 );
-export const GENERATION_ERROR = (progressStep: string) =>
+export const GENERATION_ERROR_TITLE = (progressStep: string) =>
   i18n.translate('xpack.integrationAssistant.step.dataStream.generationError', {
     values: { progressStep },
     defaultMessage: 'An error occurred during: {progressStep}',
@@ -198,24 +193,44 @@ export const RETRY = i18n.translate('xpack.integrationAssistant.step.dataStream.
   defaultMessage: 'Retry',
 });
 
-export const ERROR_TRANSLATION: Record<ErrorCode, string> = {
-  [ErrorCode.RECURSION_LIMIT_ANALYZE_LOGS]: i18n.translate(
+export const GENERATION_ERROR_TRANSLATION: Record<
+  GenerationErrorCode,
+  string | ((attributes: GenerationErrorAttributes) => string)
+> = {
+  [GenerationErrorCode.RECURSION_LIMIT_ANALYZE_LOGS]: i18n.translate(
     'xpack.integrationAssistant.errors.recursionLimitAnalyzeLogsErrorMessage',
     {
       defaultMessage:
         'Please verify the format of log samples is correct and try again. Try with a fewer samples if error persists.',
     }
   ),
-  [ErrorCode.RECURSION_LIMIT]: i18n.translate(
+  [GenerationErrorCode.RECURSION_LIMIT]: i18n.translate(
     'xpack.integrationAssistant.errors.recursionLimitReached',
     {
       defaultMessage: 'Max attempts exceeded. Please try again.',
     }
   ),
-  [ErrorCode.UNSUPPORTED_LOG_SAMPLES_FORMAT]: i18n.translate(
+  [GenerationErrorCode.UNSUPPORTED_LOG_SAMPLES_FORMAT]: i18n.translate(
     'xpack.integrationAssistant.errors.unsupportedLogSamples',
     {
       defaultMessage: 'Unsupported log format in the samples.',
     }
   ),
+  [GenerationErrorCode.UNPARSEABLE_CSV_DATA]: (attributes) => {
+    if (
+      attributes.underlyingMessages !== undefined &&
+      attributes.underlyingMessages?.length !== 0
+    ) {
+      return i18n.translate('xpack.integrationAssistant.errors.uparseableCSV.withReason', {
+        values: {
+          reason: attributes.underlyingMessages[0],
+        },
+        defaultMessage: `Cannot parse the samples as the CSV data (reason: {reason}). Please check the provided samples.`,
+      });
+    } else {
+      return i18n.translate('xpack.integrationAssistant.errors.uparseableCSV.withoutReason', {
+        defaultMessage: `Cannot parse the samples as the CSV data. Please check the provided samples.`,
+      });
+    }
+  },
 };
diff --git a/...create_integration/create_integration_assistant/steps/data_stream_step/use_generation.tsx b/...create_integration/create_integration_assistant/steps/data_stream_step/use_generation.tsx
@@ -16,6 +16,7 @@ import {
   type EcsMappingRequestBody,
   type RelatedRequestBody,
 } from '../../../../../../common';
+import { isGenerationErrorBody } from '../../../../../../common/api/generation_error';
 import {
   runCategorizationGraph,
   runEcsGraph,
@@ -26,7 +27,6 @@ import { useKibana } from '../../../../../common/hooks/use_kibana';
 import type { State } from '../../state';
 import * as i18n from './translations';
 import { useTelemetry } from '../../../telemetry';
-import type { ErrorCode } from '../../../../../../common/constants';
 import type { AIConnector, IntegrationSettings } from '../../types';
 
 export type OnComplete = (result: State['result']) => void;
@@ -46,6 +46,18 @@ interface RunGenerationProps {
   setProgress: (progress: ProgressItem) => void;
 }
 
+// If the result is classified as a generation error, produce an error message
+// as defined in the i18n file. Otherwise, return undefined.
+function generationErrorMessage(body: unknown | undefined): string | undefined {
+  if (!isGenerationErrorBody(body)) {
+    return;
+  }
+
+  const errorCode = body.attributes.errorCode;
+  const translation = i18n.GENERATION_ERROR_TRANSLATION[errorCode];
+  return typeof translation === 'function' ? translation(body.attributes) : translation;
+}
+
 interface GenerationResults {
   pipeline: Pipeline;
   docs: Docs;
@@ -96,12 +108,7 @@ export const useGeneration = ({
           error: originalErrorMessage,
         });
 
-        let errorMessage = originalErrorMessage;
-        const errorCode = e.body?.attributes?.errorCode as ErrorCode | undefined;
-        if (errorCode != null) {
-          errorMessage = i18n.ERROR_TRANSLATION[errorCode];
-        }
-        setError(errorMessage);
+        setError(generationErrorMessage(e.body) ?? originalErrorMessage);
       } finally {
         setIsRequesting(false);
       }
@@ -145,6 +152,9 @@ async function runGeneration({
     const analyzeLogsRequest: AnalyzeLogsRequestBody = {
       packageName: integrationSettings.name ?? '',
       dataStreamName: integrationSettings.dataStreamName ?? '',
+      packageTitle: integrationSettings.title ?? integrationSettings.name ?? '',
+      dataStreamTitle:
+        integrationSettings.dataStreamTitle ?? integrationSettings.dataStreamName ?? '',
       logSamples: integrationSettings.logSamples ?? [],
       connectorId: connector.id,
       langSmithOptions: getLangSmithOptions(),