diff --git a/x-pack/plugins/search_playground/__mocks__/fetch_query_source_fields.mock.ts b/x-pack/plugins/search_playground/__mocks__/fetch_query_source_fields.mock.ts index b25ca903c7a4b..d421ad6c8c9b7 100644 --- a/x-pack/plugins/search_playground/__mocks__/fetch_query_source_fields.mock.ts +++ b/x-pack/plugins/search_playground/__mocks__/fetch_query_source_fields.mock.ts @@ -5,7 +5,177 @@ * 2.0. */ -import { SearchResponse } from '@elastic/elasticsearch/lib/api/types'; +import { IndicesGetMappingResponse, SearchResponse } from '@elastic/elasticsearch/lib/api/types'; + +export const SPARSE_SEMANTIC_FIELD_FIELD_CAPS = { + indices: ['test-index2'], + fields: { + infer_field: { + semantic_text: { + type: 'semantic_text', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks.embeddings': { + sparse_vector: { + type: 'sparse_vector', + metadata_field: false, + searchable: true, + aggregatable: false, + }, + }, + non_infer_field: { + text: { + type: 'text', + metadata_field: false, + searchable: true, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks.text': { + keyword: { + type: 'keyword', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference': { + object: { + type: 'object', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks': { + nested: { + type: 'nested', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + }, +}; + +export const SPARSE_SEMANTIC_FIELD_MAPPINGS = { + 'test-index2': { + mappings: { + properties: { + infer_field: { + type: 'semantic_text', + inference_id: 'elser-endpoint', + model_settings: { + task_type: 'sparse_embedding', + }, + }, + non_infer_field: { + type: 'text', + }, + }, + }, + }, +} as any as IndicesGetMappingResponse; + +export const DENSE_SEMANTIC_FIELD_MAPPINGS = { + 'test-index2': { + mappings: { + properties: { + infer_field: { + type: 'semantic_text', + inference_id: 'cohere', + model_settings: { + task_type: 'text_embedding', + dimensions: 1536, + similarity: 'dot_product', + }, + }, + non_infer_field: { + type: 'text', + }, + }, + }, + }, +} as any as IndicesGetMappingResponse; + +// for when semantic_text field hasn't been mapped with task_type +// when theres no data / no inference has been performed in the field +export const DENSE_SEMANTIC_FIELD_MAPPINGS_MISSING_TASK_TYPE = { + 'test-index2': { + mappings: { + properties: { + infer_field: { + type: 'semantic_text', + inference_id: 'cohere', + model_settings: { + dimensions: 1536, + similarity: 'dot_product', + }, + }, + non_infer_field: { + type: 'text', + }, + }, + }, + }, +} as any as IndicesGetMappingResponse; + +export const DENSE_SEMANTIC_FIELD_FIELD_CAPS = { + indices: ['test-index2'], + fields: { + infer_field: { + semantic_text: { + type: 'semantic_text', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks.embeddings': { + sparse_vector: { + type: 'dense_vector', + metadata_field: false, + searchable: true, + aggregatable: false, + }, + }, + non_infer_field: { + text: { + type: 'text', + metadata_field: false, + searchable: true, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks.text': { + keyword: { + type: 'keyword', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference': { + object: { + type: 'object', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + 'infer_field.inference.chunks': { + nested: { + type: 'nested', + metadata_field: false, + searchable: false, + aggregatable: false, + }, + }, + }, +}; export const DENSE_SPARSE_SAME_FIELD_NAME_CAPS = { indices: ['cohere-embeddings', 'elser_index'], diff --git a/x-pack/plugins/search_playground/common/types.ts b/x-pack/plugins/search_playground/common/types.ts index c788b2df1896f..00bfde68ec2ea 100644 --- a/x-pack/plugins/search_playground/common/types.ts +++ b/x-pack/plugins/search_playground/common/types.ts @@ -7,18 +7,25 @@ export type IndicesQuerySourceFields = Record; -interface ModelFields { +interface ModelField { field: string; model_id: string; - nested: boolean; + indices: string[]; +} + +interface SemanticField { + field: string; + inferenceId: string; + embeddingType: 'sparse_vector' | 'dense_vector'; indices: string[]; } export interface QuerySourceFields { - elser_query_fields: ModelFields[]; - dense_vector_query_fields: ModelFields[]; + elser_query_fields: ModelField[]; + dense_vector_query_fields: ModelField[]; bm25_query_fields: string[]; source_fields: string[]; + semantic_fields: SemanticField[]; skipped_fields: number; } diff --git a/x-pack/plugins/search_playground/public/components/edit_context/edit_context_flyout.test.tsx b/x-pack/plugins/search_playground/public/components/edit_context/edit_context_flyout.test.tsx index 4e5bb7f807900..de82892b167be 100644 --- a/x-pack/plugins/search_playground/public/components/edit_context/edit_context_flyout.test.tsx +++ b/x-pack/plugins/search_playground/public/components/edit_context/edit_context_flyout.test.tsx @@ -19,12 +19,14 @@ jest.mock('../../hooks/use_indices_fields', () => ({ dense_vector_query_fields: [], bm25_query_fields: ['field1', 'field2'], source_fields: ['context_field1', 'context_field2'], + semantic_fields: [], }, index2: { elser_query_fields: [], dense_vector_query_fields: [], bm25_query_fields: ['field1', 'field2'], source_fields: ['context_field1', 'context_field2'], + semantic_fields: [], }, }, }), diff --git a/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.test.tsx b/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.test.tsx index 410989eaf52ad..39136e2557296 100644 --- a/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.test.tsx +++ b/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.test.tsx @@ -10,6 +10,7 @@ import { render, fireEvent, screen } from '@testing-library/react'; import { ViewQueryFlyout } from './view_query_flyout'; import { FormProvider, useForm } from 'react-hook-form'; import { __IntlProvider as IntlProvider } from '@kbn/i18n-react'; +import { ChatFormFields } from '../../types'; jest.mock('../../hooks/use_indices_fields', () => ({ useIndicesFields: () => ({ @@ -19,12 +20,14 @@ jest.mock('../../hooks/use_indices_fields', () => ({ dense_vector_query_fields: [], bm25_query_fields: ['field1', 'field2'], skipped_fields: 1, + semantic_fields: [], }, index2: { elser_query_fields: [], dense_vector_query_fields: [], bm25_query_fields: ['field1', 'field2'], skipped_fields: 0, + semantic_fields: [], }, }, }), @@ -41,7 +44,11 @@ jest.mock('../../hooks/use_usage_tracker', () => ({ const MockFormProvider = ({ children }: { children: React.ReactElement }) => { const methods = useForm({ values: { - indices: ['index1', 'index2'], + [ChatFormFields.indices]: ['index1', 'index2'], + [ChatFormFields.sourceFields]: { + index1: ['field1'], + index2: ['field1'], + }, }, }); return {children}; diff --git a/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.tsx b/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.tsx index 64139307de4ba..2fd64f073eac7 100644 --- a/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.tsx +++ b/x-pack/plugins/search_playground/public/components/view_query/view_query_flyout.tsx @@ -65,6 +65,12 @@ const groupTypeQueryFields = ( typeQueryFields += (typeQueryFields ? '_' : '') + 'SPARSE'; } + if ( + selectedFields.some((field) => indexFields.semantic_fields.find((f) => f.field === field)) + ) { + typeQueryFields += (typeQueryFields ? '_' : '') + 'SEMANTIC'; + } + return typeQueryFields; }); @@ -76,6 +82,7 @@ export const ViewQueryFlyout: React.FC = ({ onClose }) => const usageTracker = useUsageTracker(); const { getValues } = useFormContext(); const selectedIndices: string[] = getValues(ChatFormFields.indices); + const sourceFields = getValues(ChatFormFields.sourceFields); const { fields } = useIndicesFields(selectedIndices); const defaultFields = getDefaultQueryFields(fields); @@ -111,7 +118,7 @@ export const ViewQueryFlyout: React.FC = ({ onClose }) => const saveQuery = () => { queryFieldsOnChange(tempQueryFields); - elasticsearchQueryChange(createQuery(tempQueryFields, fields)); + elasticsearchQueryChange(createQuery(tempQueryFields, sourceFields, fields)); onClose(); const groupedQueryFields = groupTypeQueryFields(fields, tempQueryFields); @@ -168,7 +175,7 @@ export const ViewQueryFlyout: React.FC = ({ onClose }) => lineNumbers data-test-subj="ViewElasticsearchQueryResult" > - {JSON.stringify(createQuery(tempQueryFields, fields), null, 2)} + {JSON.stringify(createQuery(tempQueryFields, sourceFields, fields), null, 2)} @@ -198,6 +205,7 @@ export const ViewQueryFlyout: React.FC = ({ onClose }) => aria-label="Select query fields" data-test-subj={`queryFieldsSelectable_${index}`} options={[ + ...group.semantic_fields, ...group.elser_query_fields, ...group.dense_vector_query_fields, ...group.bm25_query_fields, diff --git a/x-pack/plugins/search_playground/public/hooks/use_source_indices_field.ts b/x-pack/plugins/search_playground/public/hooks/use_source_indices_field.ts index 342be7c191778..bc9a37060fb6f 100644 --- a/x-pack/plugins/search_playground/public/hooks/use_source_indices_field.ts +++ b/x-pack/plugins/search_playground/public/hooks/use_source_indices_field.ts @@ -89,7 +89,7 @@ export const useSourceIndicesFields = () => { setNoFieldsIndicesWarning(null); } - onElasticsearchQueryChange(createQuery(defaultFields, fields)); + onElasticsearchQueryChange(createQuery(defaultFields, defaultSourceFields, fields)); onSourceFieldsChange(defaultSourceFields); usageTracker?.count( AnalyticsEvents.sourceFieldsLoaded, diff --git a/x-pack/plugins/search_playground/public/hooks/use_source_indices_fields.test.tsx b/x-pack/plugins/search_playground/public/hooks/use_source_indices_fields.test.tsx index f3b19e8d4360e..7dd1a43d6fc01 100644 --- a/x-pack/plugins/search_playground/public/hooks/use_source_indices_fields.test.tsx +++ b/x-pack/plugins/search_playground/public/hooks/use_source_indices_fields.test.tsx @@ -36,7 +36,6 @@ describe.skip('useSourceIndicesFields Hook', () => { { field: 'field1', model_id: 'model1', - nested: false, indices: ['newIndex'], }, ], @@ -44,6 +43,7 @@ describe.skip('useSourceIndicesFields Hook', () => { bm25_query_fields: [], source_fields: ['field1'], skipped_fields: 0, + semantic_fields: [], }, }; @@ -87,11 +87,11 @@ describe.skip('useSourceIndicesFields Hook', () => { expect(result.current.indices).toEqual([]); expect(getValues()).toMatchInlineSnapshot(` Object { - "doc_size": 5, + "doc_size": 3, "elasticsearch_query": Object {}, "indices": Array [], "prompt": "You are an assistant for question-answering tasks.", - "source_fields": Array [], + "source_fields": Object {}, } `); result.current.addIndex('newIndex'); @@ -109,16 +109,15 @@ describe.skip('useSourceIndicesFields Hook', () => { expect(result.current.loading).toBe(false); expect(getValues()).toMatchInlineSnapshot(` Object { - "doc_size": 5, + "doc_size": 3, "elasticsearch_query": Object { "retriever": Object { "standard": Object { "query": Object { - "text_expansion": Object { - "field1": Object { - "model_id": "model1", - "model_text": "{query}", - }, + "sparse_vector": Object { + "field": "field1", + "inference_id": "model1", + "query": "{query}", }, }, }, @@ -146,6 +145,7 @@ describe.skip('useSourceIndicesFields Hook', () => { bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -169,7 +169,7 @@ describe.skip('useSourceIndicesFields Hook', () => { expect(result.current.loading).toBe(false); expect(getValues()).toMatchInlineSnapshot(` Object { - "doc_size": 5, + "doc_size": 3, "elasticsearch_query": Object { "retriever": Object { "standard": Object { @@ -199,6 +199,7 @@ describe.skip('useSourceIndicesFields Hook', () => { bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -222,7 +223,7 @@ describe.skip('useSourceIndicesFields Hook', () => { expect(result.current.loading).toBe(false); expect(getValues()).toMatchInlineSnapshot(` Object { - "doc_size": 5, + "doc_size": 3, "elasticsearch_query": Object { "retriever": Object { "standard": Object { diff --git a/x-pack/plugins/search_playground/public/utils/create_query.test.ts b/x-pack/plugins/search_playground/public/utils/create_query.test.ts index 282326f0991d2..164f79618d74c 100644 --- a/x-pack/plugins/search_playground/public/utils/create_query.test.ts +++ b/x-pack/plugins/search_playground/public/utils/create_query.test.ts @@ -9,6 +9,8 @@ import { IndicesQuerySourceFields } from '../types'; import { createQuery, getDefaultQueryFields, getDefaultSourceFields } from './create_query'; describe('create_query', () => { + const sourceFields = { index1: [], index2: [] }; + describe('createQuery', () => { it('should return a sparse single query', () => { const fields = { @@ -17,25 +19,23 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { standard: { query: { - text_expansion: { - field1: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field1', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -51,16 +51,15 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { elser_query_fields: [], - dense_vector_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + dense_vector_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { standard: { query: { @@ -89,33 +88,34 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1', 'index2'] }, + { field: 'field1', model_id: 'model1', indices: ['index1', 'index2'] }, ], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1', 'index2'] }, + { field: 'field1', model_id: 'model1', indices: ['index1', 'index2'] }, ], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { standard: { query: { - text_expansion: { - field1: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field1', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -131,37 +131,34 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'field2', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { rrf: { retrievers: [ { standard: { query: { - text_expansion: { - field1: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field1', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -169,11 +166,10 @@ describe('create_query', () => { { standard: { query: { - text_expansion: { - field2: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field2', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -184,72 +180,6 @@ describe('create_query', () => { }); }); - it('should return empty for nested dense query', () => { - const fields = { - index1: ['passages.field1.predicted_value'], - }; - - const fieldDescriptors: IndicesQuerySourceFields = { - index1: { - elser_query_fields: [], - dense_vector_query_fields: [ - { - field: 'passages.field1.predicted_value', - model_id: 'model1', - nested: true, - indices: ['index1'], - }, - ], - bm25_query_fields: [], - source_fields: [], - skipped_fields: 0, - }, - }; - - expect(createQuery(fields, fieldDescriptors)).toEqual({ - retriever: { - standard: { - query: { - match_all: {}, - }, - }, - }, - }); - }); - - it('should return empty for nested sparse query', () => { - const fields = { - index1: ['passages.field1.tokens'], - }; - - const fieldDescriptors: IndicesQuerySourceFields = { - index1: { - elser_query_fields: [ - { - field: 'passages.field1.tokens', - model_id: 'model1', - nested: true, - indices: ['index1'], - }, - ], - dense_vector_query_fields: [], - bm25_query_fields: [], - source_fields: [], - skipped_fields: 0, - }, - }; - - expect(createQuery(fields, fieldDescriptors)).toEqual({ - retriever: { - standard: { - query: { - match_all: {}, - }, - }, - }, - }); - }); - describe('hybrid without RRF', () => { it('should return a hybrid query', () => { const fields = { @@ -259,37 +189,34 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [], bm25_query_fields: ['content', 'title'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'field2', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors, { rrf: false })).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors, { rrf: false })).toEqual({ retriever: { standard: { query: { bool: { should: [ { - text_expansion: { - field1: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field1', + inference_id: 'model1', + query: '{query}', }, }, { @@ -299,11 +226,10 @@ describe('create_query', () => { }, }, { - text_expansion: { - field2: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field2', + inference_id: 'model1', + query: '{query}', }, }, ], @@ -325,37 +251,34 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [], bm25_query_fields: ['content', 'title'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'field2', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { rrf: { retrievers: [ { standard: { query: { - text_expansion: { - field1: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field1', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -373,11 +296,10 @@ describe('create_query', () => { { standard: { query: { - text_expansion: { - field2: { - model_id: 'model1', - model_text: '{query}', - }, + sparse_vector: { + field: 'field2', + inference_id: 'model1', + query: '{query}', }, }, }, @@ -397,25 +319,23 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { elser_query_fields: [], - dense_vector_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + dense_vector_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], bm25_query_fields: ['content', 'title'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'field2', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'field2', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { standard: { query: { @@ -444,16 +364,15 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { elser_query_fields: [], - dense_vector_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + dense_vector_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], bm25_query_fields: ['content', 'title'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; - expect(createQuery(fields, fieldDescriptors)).toEqual({ + expect(createQuery(fields, sourceFields, fieldDescriptors)).toEqual({ retriever: { rrf: { retrievers: [ @@ -488,21 +407,262 @@ describe('create_query', () => { }, }); }); + + describe('semantic fields', () => { + describe('sparse_vector embedding', () => { + it('should return a query with semantic field, specified as a source field', () => { + // as the field is specified as a source field, it should use the nested query and manually calling the sparse_vector query + const fields = { + index1: ['field2', 'title', 'content'], + }; + + const fieldDescriptors: IndicesQuerySourceFields = { + index1: { + elser_query_fields: [], + dense_vector_query_fields: [ + { field: 'field1', model_id: 'model1', indices: ['index1'] }, + ], + bm25_query_fields: ['content', 'title'], + source_fields: [], + skipped_fields: 0, + semantic_fields: [ + { + field: 'field2', + inferenceId: 'model2', + indices: ['index1'], + embeddingType: 'sparse_vector', + }, + ], + }, + }; + + expect( + createQuery( + fields, + { + index1: ['field2'], + }, + fieldDescriptors + ) + ).toEqual({ + retriever: { + rrf: { + retrievers: [ + { + standard: { + query: { + nested: { + inner_hits: { _source: ['field2.inference.chunks.text'], size: 2 }, + path: 'field2.inference.chunks', + query: { + sparse_vector: { + field: 'field2.inference.chunks.embeddings', + inference_id: 'model2', + query: '{query}', + }, + }, + }, + }, + }, + }, + { + standard: { + query: { multi_match: { fields: ['title', 'content'], query: '{query}' } }, + }, + }, + ], + }, + }, + }); + }); + + it('should return a query with semantic field, specified not as a source field', () => { + // this should fallback to using the semantic field for querying + const fields = { + index1: ['field2', 'title', 'content'], + }; + + const fieldDescriptors: IndicesQuerySourceFields = { + index1: { + elser_query_fields: [], + dense_vector_query_fields: [ + { field: 'field1', model_id: 'model1', indices: ['index1'] }, + ], + bm25_query_fields: ['content', 'title'], + source_fields: [], + skipped_fields: 0, + semantic_fields: [ + { + field: 'field2', + inferenceId: 'model2', + indices: ['index1'], + embeddingType: 'sparse_vector', + }, + ], + }, + }; + + expect( + createQuery( + fields, + { + index1: ['content'], + }, + fieldDescriptors + ) + ).toEqual({ + retriever: { + rrf: { + retrievers: [ + { standard: { query: { semantic: { field: 'field2', query: '{query}' } } } }, + { + standard: { + query: { multi_match: { fields: ['title', 'content'], query: '{query}' } }, + }, + }, + ], + }, + }, + }); + }); + }); + + describe('dense embedding', () => { + it('should return a query with semantic field, specified as a source field', () => { + // as the field is specified as a source field, it should use the nested query and manually calling the knn query + const fields = { + index1: ['field2', 'title', 'content'], + }; + + const fieldDescriptors: IndicesQuerySourceFields = { + index1: { + elser_query_fields: [], + dense_vector_query_fields: [ + { field: 'field1', model_id: 'model1', indices: ['index1'] }, + ], + bm25_query_fields: ['content', 'title'], + source_fields: [], + skipped_fields: 0, + semantic_fields: [ + { + field: 'field2', + inferenceId: 'model2', + indices: ['index1'], + embeddingType: 'dense_vector', + }, + ], + }, + }; + + expect( + createQuery( + fields, + { + index1: ['field2'], + }, + fieldDescriptors + ) + ).toEqual({ + retriever: { + rrf: { + retrievers: [ + { + standard: { + query: { + nested: { + inner_hits: { _source: ['field2.inference.chunks.text'], size: 2 }, + path: 'field2.inference.chunks', + query: { + knn: { + field: 'field2.inference.chunks.embeddings', + query_vector_builder: { + text_embedding: { + model_id: 'model2', + model_text: '{query}', + }, + }, + }, + }, + }, + }, + }, + }, + { + standard: { + query: { multi_match: { fields: ['title', 'content'], query: '{query}' } }, + }, + }, + ], + }, + }, + }); + }); + + it('should return a query with semantic field, specified not as a source field', () => { + // this should fallback to using the semantic field for querying + const fields = { + index1: ['field2', 'title', 'content'], + }; + + const fieldDescriptors: IndicesQuerySourceFields = { + index1: { + elser_query_fields: [], + dense_vector_query_fields: [ + { field: 'field1', model_id: 'model1', indices: ['index1'] }, + ], + bm25_query_fields: ['content', 'title'], + source_fields: [], + skipped_fields: 0, + semantic_fields: [ + { + field: 'field2', + inferenceId: 'model2', + indices: ['index1'], + embeddingType: 'dense_vector', + }, + ], + }, + }; + + expect( + createQuery( + fields, + { + index1: ['content'], + }, + fieldDescriptors + ) + ).toEqual({ + retriever: { + rrf: { + retrievers: [ + { standard: { query: { semantic: { field: 'field2', query: '{query}' } } } }, + { + standard: { + query: { multi_match: { fields: ['title', 'content'], query: '{query}' } }, + }, + }, + ], + }, + }, + }); + }); + }); + }); }); describe('getDefaultQueryFields', () => { it('should return default ELSER query fields', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [ - { field: 'field1', model_id: 'dense_model', nested: false, indices: ['index1'] }, + { field: 'field1', model_id: 'dense_model', indices: ['index1'] }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -512,36 +672,34 @@ describe('create_query', () => { it('should return default elser query fields for multiple indices', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [ { field: 'dv_field1', model_id: 'dense_model', - nested: false, + indices: ['index1', 'index2'], }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'vector', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'vector', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [ { field: 'dv_field1', model_id: 'dense_model', - nested: false, + indices: ['index1', 'index2'], }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -554,36 +712,34 @@ describe('create_query', () => { it('should return elser query fields for default fields', () => { const fieldDescriptors: IndicesQuerySourceFields = { index1: { - elser_query_fields: [ - { field: 'field1', model_id: 'model1', nested: false, indices: ['index1'] }, - ], + elser_query_fields: [{ field: 'field1', model_id: 'model1', indices: ['index1'] }], dense_vector_query_fields: [ { field: 'dv_field1', model_id: 'dense_model', - nested: false, + indices: ['index1', 'index2'], }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, index2: { - elser_query_fields: [ - { field: 'vector', model_id: 'model1', nested: false, indices: ['index2'] }, - ], + elser_query_fields: [{ field: 'vector', model_id: 'model1', indices: ['index2'] }], dense_vector_query_fields: [ { field: 'dv_field1', model_id: 'dense_model', - nested: false, + indices: ['index1', 'index2'], }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -598,11 +754,12 @@ describe('create_query', () => { index1: { elser_query_fields: [], dense_vector_query_fields: [ - { field: 'dv_field1', model_id: 'dense_model', nested: false, indices: ['index1'] }, + { field: 'dv_field1', model_id: 'dense_model', indices: ['index1'] }, ], bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -617,6 +774,7 @@ describe('create_query', () => { bm25_query_fields: ['title', 'text', 'content'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -633,6 +791,7 @@ describe('create_query', () => { bm25_query_fields: ['unknown1', 'unknown2'], source_fields: [], skipped_fields: 0, + semantic_fields: [], }, }; @@ -648,6 +807,7 @@ describe('create_query', () => { 'search-search-labs': { elser_query_fields: [], dense_vector_query_fields: [], + semantic_fields: [], bm25_query_fields: [ 'additional_urls', 'title', @@ -695,6 +855,7 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { 'search-search-labs': { elser_query_fields: [], + semantic_fields: [], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: [], @@ -713,6 +874,7 @@ describe('create_query', () => { const fieldDescriptors: IndicesQuerySourceFields = { 'search-search-labs': { elser_query_fields: [], + semantic_fields: [], dense_vector_query_fields: [], bm25_query_fields: [], source_fields: ['non_suggested_field'], diff --git a/x-pack/plugins/search_playground/public/utils/create_query.ts b/x-pack/plugins/search_playground/public/utils/create_query.ts index b2602a45707a8..0f65bea734d5c 100644 --- a/x-pack/plugins/search_playground/public/utils/create_query.ts +++ b/x-pack/plugins/search_playground/public/utils/create_query.ts @@ -46,6 +46,7 @@ interface ReRankOptions { export function createQuery( fields: IndexFields, + sourceFields: IndexFields, fieldDescriptors: IndicesQuerySourceFields, rerankOptions: ReRankOptions = { rrf: true, @@ -57,19 +58,83 @@ export function createQuery( const indexFields: string[] = fields[index]; const indexFieldDescriptors: QuerySourceFields = fieldDescriptors[index]; + const semanticMatches = indexFields.map((field) => { + const semanticField = indexFieldDescriptors.semantic_fields.find((x) => x.field === field); + const isSourceField = sourceFields[index].includes(field); + + // this is needed to get the inner_hits for the source field + // we cant rely on only the semantic field + // in future inner_hits option will be added to semantic + if (semanticField && isSourceField) { + if (semanticField.embeddingType === 'dense_vector') { + const filter = + semanticField.indices.length < indices.length + ? { filter: { terms: { _index: semanticField.indices } } } + : {}; + + return { + nested: { + path: `${semanticField.field}.inference.chunks`, + query: { + knn: { + field: `${semanticField.field}.inference.chunks.embeddings`, + ...filter, + query_vector_builder: { + text_embedding: { + model_id: semanticField.inferenceId, + model_text: '{query}', + }, + }, + }, + }, + inner_hits: { + size: 2, + _source: [`${semanticField.field}.inference.chunks.text`], + }, + }, + }; + } else if (semanticField.embeddingType === 'sparse_vector') { + return { + nested: { + path: `${semanticField.field}.inference.chunks`, + query: { + sparse_vector: { + inference_id: semanticField.inferenceId, + field: `${semanticField.field}.inference.chunks.embeddings`, + query: '{query}', + }, + }, + inner_hits: { + size: 2, + _source: [`${semanticField.field}.inference.chunks.text`], + }, + }, + }; + } + } else if (semanticField) { + return { + semantic: { + field: semanticField.field, + query: '{query}', + }, + }; + } else { + return null; + } + }); + const sparseMatches = indexFields.map((field) => { const elserField = indexFieldDescriptors.elser_query_fields.find( (x) => x.field === field ); - // not supporting nested fields for now - if (elserField && !elserField.nested) { + if (elserField) { // when another index has the same field, we don't want to duplicate the match rule const hasExistingSparseMatch = acc.queryMatches.find( - (x: any) => - x?.text_expansion?.[field] && - x?.text_expansion?.[field].model_id === elserField?.model_id + (x) => + x?.sparse_vector?.field === field && + x?.sparse_vector?.inference_id === elserField?.model_id ); if (hasExistingSparseMatch) { @@ -77,11 +142,10 @@ export function createQuery( } return { - text_expansion: { - [elserField.field]: { - model_id: elserField.model_id, - model_text: '{query}', - }, + sparse_vector: { + field: elserField.field, + inference_id: elserField.model_id, + query: '{query}', }, }; } @@ -108,8 +172,7 @@ export function createQuery( (x) => x.field === field ); - // not supporting nested fields for now - if (denseVectorField && !denseVectorField.nested) { + if (denseVectorField) { // when the knn field isn't found in all indices, we need a filter to ensure we only use the field from the correct index const filter = denseVectorField.indices.length < indices.length @@ -134,7 +197,7 @@ export function createQuery( }) .filter((x) => !!x); - const matches = [...sparseMatches, bm25Match].filter((x) => !!x); + const matches = [...sparseMatches, ...semanticMatches, bm25Match].filter((x) => !!x); return { queryMatches: [...acc.queryMatches, ...matches], @@ -222,6 +285,14 @@ export function getDefaultSourceFields(fieldDescriptors: IndicesQuerySourceField (acc: IndexFields, index: string) => { const indexFieldDescriptors = fieldDescriptors[index]; + // semantic_text fields are prioritized + if (indexFieldDescriptors.semantic_fields.length > 0) { + return { + ...acc, + [index]: indexFieldDescriptors.semantic_fields.map((x) => x.field), + }; + } + // if there are no source fields, we don't need to suggest anything if (indexFieldDescriptors.source_fields.length === 0) { return { @@ -253,7 +324,9 @@ export function getDefaultQueryFields(fieldDescriptors: IndicesQuerySourceFields const indexFieldDescriptors = fieldDescriptors[index]; const fields: string[] = []; - if (indexFieldDescriptors.elser_query_fields.length > 0) { + if (indexFieldDescriptors.semantic_fields.length > 0) { + fields.push(...indexFieldDescriptors.semantic_fields.map((x) => x.field)); + } else if (indexFieldDescriptors.elser_query_fields.length > 0) { const suggested = indexFieldDescriptors.elser_query_fields.filter((x) => SUGGESTED_SPARSE_FIELDS.includes(x.field) ); diff --git a/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts b/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts index ddc95d9dd4346..f7ad46d3fdfda 100644 --- a/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts +++ b/x-pack/plugins/search_playground/server/lib/conversational_chain.test.ts @@ -208,6 +208,57 @@ describe('conversational chain', () => { }); }, 10000); + it('should be able to create a conversational chain with inner hit field', async () => { + await createTestChain({ + responses: ['the final answer'], + chat: [ + { + id: '1', + role: 'user', + content: 'what is the work from home policy?', + }, + ], + expectedFinalAnswer: 'the final answer', + docs: [ + { + _index: 'index', + _id: '1', + inner_hits: { + 'field.inference.chunks': { + hits: { + hits: [ + { + _source: { + text: 'value', + }, + }, + ], + }, + }, + }, + }, + ], + expectedDocs: [ + { + documents: [{ metadata: { _id: '1', _index: 'index' }, pageContent: 'value' }], + type: 'retrieved_docs', + }, + ], + expectedTokens: [ + { type: 'context_token_count', count: 7 }, + { type: 'prompt_token_count', count: 20 }, + ], + expectedSearchRequest: [ + { + method: 'POST', + path: '/index,website/_search', + body: { query: { match: { field: 'what is the work from home policy?' } }, size: 3 }, + }, + ], + contentField: { index: 'field' }, + }); + }, 10000); + it('asking with chat history should re-write the question', async () => { await createTestChain({ responses: ['rewrite the question', 'the final answer'], diff --git a/x-pack/plugins/search_playground/server/lib/elasticsearch_retriever.ts b/x-pack/plugins/search_playground/server/lib/elasticsearch_retriever.ts index 4a2c3344b2934..8504651bb6398 100644 --- a/x-pack/plugins/search_playground/server/lib/elasticsearch_retriever.ts +++ b/x-pack/plugins/search_playground/server/lib/elasticsearch_retriever.ts @@ -89,7 +89,7 @@ export class ElasticsearchRetriever extends BaseRetriever { : this.content_field[hit._index as string]; // we need to iterate over the _source object to get the value of complex key definition such as metadata.source - const valueForSelectedField = getValueForSelectedField(hit._source, pageContentFieldKey); + const valueForSelectedField = getValueForSelectedField(hit, pageContentFieldKey); return new Document({ pageContent: valueForSelectedField, diff --git a/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.test.ts b/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.test.ts index 539a3deaa7c03..8991cb9924480 100644 --- a/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.test.ts +++ b/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.test.ts @@ -19,8 +19,13 @@ import { SPARSE_INPUT_OUTPUT_ONE_INDEX, SPARSE_INPUT_OUTPUT_ONE_INDEX_FIELD_CAPS, SPARSE_INPUT_OUTPUT_ONE_INDEX_FIELD_CAPS_MODEL_ID_KEYWORD, + SPARSE_SEMANTIC_FIELD_FIELD_CAPS, + SPARSE_SEMANTIC_FIELD_MAPPINGS, DENSE_SPARSE_SAME_FIELD_NAME_CAPS, DENSE_SPARSE_SAME_FIELD_NAME_DOCS, + DENSE_SEMANTIC_FIELD_MAPPINGS, + DENSE_SEMANTIC_FIELD_FIELD_CAPS, + DENSE_SEMANTIC_FIELD_MAPPINGS_MISSING_TASK_TYPE, } from '../../__mocks__/fetch_query_source_fields.mock'; import { fetchFields, @@ -36,10 +41,20 @@ describe('fetch_query_source_fields', () => { { index: 'workplace_index', doc: ELSER_PASSAGE_CHUNKED_TWO_INDICES_DOCS[0], + mapping: { + workplace_index: { + mappings: {}, + }, + }, }, { index: 'workplace_index2', doc: ELSER_PASSAGE_CHUNKED_TWO_INDICES_DOCS[1], + mapping: { + workplace_index2: { + mappings: {}, + }, + }, }, ]) ).toEqual({ @@ -55,14 +70,15 @@ describe('fetch_query_source_fields', () => { { field: 'vector.tokens', model_id: '.elser_model_2', - nested: false, indices: ['workplace_index'], }, ], skipped_fields: 8, source_fields: ['metadata.summary', 'metadata.rolePermissions', 'text', 'metadata.name'], + semantic_fields: [], }, workplace_index2: { + semantic_fields: [], bm25_query_fields: [ 'metadata.summary', 'content', @@ -75,7 +91,6 @@ describe('fetch_query_source_fields', () => { { field: 'content_vector.tokens', model_id: '.elser_model_2', - nested: false, indices: ['workplace_index2'], }, ], @@ -95,10 +110,16 @@ describe('fetch_query_source_fields', () => { { index: 'search-example-main', doc: DENSE_PASSAGE_FIRST_SINGLE_INDEX_DOC, + mapping: { + 'search-example-main': { + mappings: {}, + }, + }, }, ]) ).toEqual({ 'search-example-main': { + semantic_fields: [], bm25_query_fields: [ 'page_content_key', 'title', @@ -118,7 +139,6 @@ describe('fetch_query_source_fields', () => { { field: 'page_content_e5_embbeding.predicted_value', model_id: '.multilingual-e5-small_linux-x86_64', - nested: false, indices: ['search-example-main'], }, ], @@ -149,18 +169,23 @@ describe('fetch_query_source_fields', () => { { index: 'search-nethys', doc: SPARSE_DOC_SINGLE_INDEX, + mapping: { + 'search-nethys': { + mappings: {}, + }, + }, }, ]) ).toEqual({ 'search-nethys': { bm25_query_fields: ['body_content', 'headings', 'title'], dense_vector_query_fields: [], + semantic_fields: [], elser_query_fields: [ { field: 'ml.inference.body_content_expanded.predicted_value', indices: ['search-nethys'], model_id: '.elser_model_2_linux-x86_64', - nested: false, }, ], source_fields: ['body_content', 'headings', 'title'], @@ -176,6 +201,11 @@ describe('fetch_query_source_fields', () => { { index: 'workplace_index_nested', doc: DENSE_VECTOR_DOCUMENT_FIRST[0], + mapping: { + workplace_index_nested: { + mappings: {}, + }, + }, }, ]) ).toEqual({ @@ -192,6 +222,7 @@ describe('fetch_query_source_fields', () => { ], dense_vector_query_fields: [], elser_query_fields: [], + semantic_fields: [], source_fields: [ 'metadata.category', 'content', @@ -213,6 +244,11 @@ describe('fetch_query_source_fields', () => { { index: 'index2', doc: DENSE_INPUT_OUTPUT_ONE_INDEX[0], + mapping: { + index2: { + mappings: {}, + }, + }, }, ]) ).toEqual({ @@ -223,10 +259,10 @@ describe('fetch_query_source_fields', () => { field: 'text_embedding', indices: ['index2'], model_id: '.multilingual-e5-small', - nested: false, }, ], elser_query_fields: [], + semantic_fields: [], source_fields: ['text'], skipped_fields: 2, }, @@ -239,6 +275,11 @@ describe('fetch_query_source_fields', () => { { index: 'index', doc: SPARSE_INPUT_OUTPUT_ONE_INDEX[0], + mapping: { + index: { + mappings: {}, + }, + }, }, ]) ).toEqual({ @@ -249,10 +290,10 @@ describe('fetch_query_source_fields', () => { field: 'text_embedding', indices: ['index'], model_id: '.elser_model_2', - nested: false, }, ], dense_vector_query_fields: [], + semantic_fields: [], source_fields: ['text'], skipped_fields: 2, }, @@ -265,10 +306,20 @@ describe('fetch_query_source_fields', () => { { index: 'cohere-embeddings', doc: DENSE_SPARSE_SAME_FIELD_NAME_DOCS[0], + mapping: { + 'cohere-embeddings': { + mappings: {}, + }, + }, }, { index: 'elser_index', doc: DENSE_SPARSE_SAME_FIELD_NAME_DOCS[1], + mapping: { + elser_index: { + mappings: {}, + }, + }, }, ]) ).toEqual({ @@ -279,12 +330,12 @@ describe('fetch_query_source_fields', () => { field: 'text_embedding', indices: ['cohere-embeddings'], model_id: 'cohere_embeddings', - nested: false, }, ], elser_query_fields: [], skipped_fields: 2, source_fields: ['text'], + semantic_fields: [], }, elser_index: { bm25_query_fields: ['text'], @@ -294,14 +345,96 @@ describe('fetch_query_source_fields', () => { field: 'text_embedding', indices: ['elser_index'], model_id: 'my-elser-model', - nested: false, }, ], skipped_fields: 2, source_fields: ['text'], + semantic_fields: [], }, }); }); + + describe('semantic text support', () => { + it('should return the correct fields for semantic text - sparse', () => { + expect( + parseFieldsCapabilities(SPARSE_SEMANTIC_FIELD_FIELD_CAPS, [ + { + index: 'test-index2', + // unused + doc: SPARSE_INPUT_OUTPUT_ONE_INDEX[0], + mapping: SPARSE_SEMANTIC_FIELD_MAPPINGS, + }, + ]) + ).toEqual({ + 'test-index2': { + bm25_query_fields: ['non_infer_field'], + dense_vector_query_fields: [], + elser_query_fields: [], + semantic_fields: [ + { + embeddingType: 'sparse_vector', + field: 'infer_field', + inferenceId: 'elser-endpoint', + indices: ['test-index2'], + }, + ], + skipped_fields: 4, + source_fields: ['infer_field', 'non_infer_field'], + }, + }); + }); + + it('should return the correct fields for semantic text - dense', () => { + expect( + parseFieldsCapabilities(DENSE_SEMANTIC_FIELD_FIELD_CAPS, [ + { + index: 'test-index2', + // unused + doc: DENSE_INPUT_OUTPUT_ONE_INDEX[0], + mapping: DENSE_SEMANTIC_FIELD_MAPPINGS, + }, + ]) + ).toEqual({ + 'test-index2': { + bm25_query_fields: ['non_infer_field'], + dense_vector_query_fields: [], + elser_query_fields: [], + semantic_fields: [ + { + embeddingType: 'dense_vector', + field: 'infer_field', + inferenceId: 'cohere', + indices: ['test-index2'], + }, + ], + skipped_fields: 4, + source_fields: ['infer_field', 'non_infer_field'], + }, + }); + }); + + it('skips if the semantic_text field not setup correctly', () => { + expect( + parseFieldsCapabilities(DENSE_SEMANTIC_FIELD_FIELD_CAPS, [ + { + index: 'test-index2', + // unused + doc: DENSE_INPUT_OUTPUT_ONE_INDEX[0], + mapping: DENSE_SEMANTIC_FIELD_MAPPINGS_MISSING_TASK_TYPE, + }, + ]) + ).toEqual({ + 'test-index2': { + bm25_query_fields: ['non_infer_field'], + dense_vector_query_fields: [], + elser_query_fields: [], + semantic_fields: [], + skipped_fields: 5, // increat by 1 for the semantic field + source_fields: ['non_infer_field'], + }, + }); + }); + }); }); describe('getModelIdFields', () => { @@ -340,6 +473,13 @@ describe('fetch_query_source_fields', () => { asCurrentUser: { fieldCaps: jest.fn().mockResolvedValue(DENSE_PASSAGE_FIRST_SINGLE_INDEX_FIELD_CAPS), search: jest.fn().mockResolvedValue(DENSE_PASSAGE_FIRST_SINGLE_INDEX_DOC), + indices: { + getMapping: jest.fn().mockResolvedValue({ + 'search-example-main': { + mappings: {}, + }, + }), + }, }, } as any; const indices = ['search-example-main']; @@ -371,6 +511,13 @@ describe('fetch_query_source_fields', () => { asCurrentUser: { fieldCaps: jest.fn().mockResolvedValue(SPARSE_INPUT_OUTPUT_ONE_INDEX_FIELD_CAPS), search: jest.fn().mockResolvedValue(SPARSE_INPUT_OUTPUT_ONE_INDEX), + indices: { + getMapping: jest.fn().mockResolvedValue({ + index: { + mappings: {}, + }, + }), + }, }, } as any; const indices = ['index']; diff --git a/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.ts b/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.ts index 82db1b8de6df4..15e1ead0bf037 100644 --- a/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.ts +++ b/x-pack/plugins/search_playground/server/lib/fetch_query_source_fields.ts @@ -5,8 +5,14 @@ * 2.0. */ -import { SearchResponse, FieldCapsResponse } from '@elastic/elasticsearch/lib/api/types'; -import { FieldCapsFieldCapability } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import { + SearchResponse, + FieldCapsResponse, + IndicesGetMappingResponse, + FieldCapsFieldCapability, + MappingPropertyBase, +} from '@elastic/elasticsearch/lib/api/types'; + import { IScopedClusterClient } from '@kbn/core-elasticsearch-server'; import { IndicesQuerySourceFields } from '../types'; @@ -15,11 +21,35 @@ interface FieldModelId { modelId: string | undefined; } +type SemanticEmbeddingType = 'sparse_vector' | 'dense_vector'; + +interface SemanticField { + field: string; + inferenceId: string; + embeddingType?: SemanticEmbeddingType; +} + interface IndexFieldModel { index: string; fields: FieldModelId[]; + semanticTextFields: SemanticField[]; } +type TaskType = 'sparse_embedding' | 'text_embedding'; + +interface MappingSemanticTextProperty extends MappingPropertyBase { + type: 'semantic_text'; + inference_id: string; + model_settings?: { + task_type: TaskType; + }; +} + +const EMBEDDING_TYPE: Record = { + sparse_embedding: 'sparse_vector', + text_embedding: 'dense_vector', +}; + export const getModelIdFields = (fieldCapsResponse: FieldCapsResponse) => { const { fields } = fieldCapsResponse; return Object.keys(fields).reduce>((acc, fieldKey) => { @@ -64,7 +94,7 @@ export const fetchFields = async ( const modelIdFields = getModelIdFields(fieldCapabilities); - const indicesAggs = await Promise.all( + const indicesAggsMappings = await Promise.all( indices.map(async (index) => ({ index, doc: await client.asCurrentUser.search({ @@ -85,14 +115,19 @@ export const fetchFields = async ( ), }, }), + mapping: await client.asCurrentUser.indices.getMapping({ index }), })) ); - return parseFieldsCapabilities(fieldCapabilities, indicesAggs); + return parseFieldsCapabilities(fieldCapabilities, indicesAggsMappings); }; const INFERENCE_MODEL_FIELD_REGEXP = /\.predicted_value|\.tokens/; +const getSemanticField = (field: string, semanticFields: SemanticField[]) => { + return semanticFields.find((sf) => sf.field === field); +}; + const getModelField = (field: string, modelIdFields: FieldModelId[]) => { // For input_output inferred fields, the model_id is at the top level const topLevelModelField = modelIdFields.find( @@ -141,12 +176,12 @@ const isFieldInIndex = ( export const parseFieldsCapabilities = ( fieldCapsResponse: FieldCapsResponse, - aggDocs: Array<{ index: string; doc: SearchResponse }> + aggMappingDocs: Array<{ index: string; doc: SearchResponse; mapping: IndicesGetMappingResponse }> ): IndicesQuerySourceFields => { const { fields, indices: indexOrIndices } = fieldCapsResponse; const indices = Array.isArray(indexOrIndices) ? indexOrIndices : [indexOrIndices]; - const indexModelIdFields = aggDocs.map((aggDoc) => { + const indexModelIdFields = aggMappingDocs.map((aggDoc) => { const modelIdFields = Object.keys(aggDoc.doc.aggregations || {}).map((field) => { return { field, @@ -154,9 +189,28 @@ export const parseFieldsCapabilities = ( }; }); + const mappingProperties = aggDoc.mapping[aggDoc.index].mappings.properties || {}; + + const semanticTextFields: SemanticField[] = Object.keys(mappingProperties || {}) + .filter( + // @ts-ignore + (field) => mappingProperties[field].type === 'semantic_text' + ) + .map((field) => { + const mapping = mappingProperties[field] as unknown as MappingSemanticTextProperty; + return { + field, + inferenceId: mapping?.inference_id, + embeddingType: mapping?.model_settings?.task_type + ? EMBEDDING_TYPE[mapping.model_settings.task_type] + : undefined, + }; + }); + return { index: aggDoc.index, fields: modelIdFields, + semanticTextFields, }; }); @@ -167,6 +221,7 @@ export const parseFieldsCapabilities = ( bm25_query_fields: [], source_fields: [], skipped_fields: 0, + semantic_fields: [], }; return acc; }, {}); @@ -186,15 +241,38 @@ export const parseFieldsCapabilities = ( : (indices as unknown as string[]); for (const index of indicesPresentIn) { - const modelIdFields = indexModelIdFields.find( + const { fields: modelIdFields, semanticTextFields } = indexModelIdFields.find( (indexModelIdField) => indexModelIdField.index === index - )!.fields; + )!; + const nestedField = isFieldNested(fieldKey, fieldCapsResponse); + + if (isFieldInIndex(field, 'semantic_text', index)) { + const semanticFieldMapping = getSemanticField(fieldKey, semanticTextFields); - if ( + // only use this when embeddingType and inferenceId is defined + // this requires semantic_text field to be set up correctly and ingested + if ( + semanticFieldMapping && + semanticFieldMapping.embeddingType && + semanticFieldMapping.inferenceId && + !nestedField + ) { + const semanticField = { + field: fieldKey, + inferenceId: semanticFieldMapping.inferenceId, + embeddingType: semanticFieldMapping.embeddingType, + indices: (field.semantic_text.indices as string[]) || indicesPresentIn, + }; + + acc[index].semantic_fields.push(semanticField); + acc[index].source_fields.push(fieldKey); + } else { + acc[index].skipped_fields++; + } + } else if ( isFieldInIndex(field, 'rank_features', index) || isFieldInIndex(field, 'sparse_vector', index) ) { - const nestedField = isFieldNested(fieldKey, fieldCapsResponse); const modelId = getModelField(fieldKey, modelIdFields); const fieldCapabilities = field.rank_features || field.sparse_vector; @@ -205,7 +283,6 @@ export const parseFieldsCapabilities = ( const elserModelField = { field: fieldKey, model_id: modelId, - nested: !!isFieldNested(fieldKey, fieldCapsResponse), indices: (fieldCapabilities.indices as string[]) || indicesPresentIn, }; acc[index].elser_query_fields.push(elserModelField); @@ -213,7 +290,6 @@ export const parseFieldsCapabilities = ( acc[index].skipped_fields++; } } else if (isFieldInIndex(field, 'dense_vector', index)) { - const nestedField = isFieldNested(fieldKey, fieldCapsResponse); const modelId = getModelField(fieldKey, modelIdFields); const fieldCapabilities = field.dense_vector; @@ -224,7 +300,6 @@ export const parseFieldsCapabilities = ( const denseVectorField = { field: fieldKey, model_id: modelId, - nested: !!nestedField, indices: (fieldCapabilities.indices as string[]) || indicesPresentIn, }; acc[index].dense_vector_query_fields.push(denseVectorField); diff --git a/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts b/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts index a13e51603cc7b..bf02f6620d38c 100644 --- a/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts +++ b/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.test.ts @@ -22,7 +22,7 @@ describe('getValueForSelectedField', () => { }, }; - expect(getValueForSelectedField(hit._source, 'test')).toEqual('The Shawshank Redemption'); + expect(getValueForSelectedField(hit, 'test')).toEqual('The Shawshank Redemption'); }); test('should return for combined key', () => { @@ -39,7 +39,7 @@ describe('getValueForSelectedField', () => { }, }; - expect(getValueForSelectedField(hit._source, 'metadata.source')).toEqual( + expect(getValueForSelectedField(hit, 'metadata.source')).toEqual( 'Over the course of several years, two convicts form a friendship, seeking consolation and, eventually, redemption through basic compassion' ); }); @@ -58,7 +58,7 @@ describe('getValueForSelectedField', () => { }, }; - expect(getValueForSelectedField(hit._source, 'metadata.sources')).toBe(''); + expect(getValueForSelectedField(hit, 'metadata.sources')).toBe(''); }); test('should return empty string for nested key', () => { @@ -75,6 +75,52 @@ describe('getValueForSelectedField', () => { }, }; - expect(getValueForSelectedField(hit._source, 'bla.sources')).toBe(''); + expect(getValueForSelectedField(hit, 'bla.sources')).toBe(''); + }); + + test('should return when its a chunked passage', () => { + const hit = { + _index: 'sample-index', + _id: '8jSNY48B6iHEi98DL1C-', + _score: 0.7789394, + _source: { + test: 'The Shawshank Redemption', + metadata: { + source: + 'Over the course of several years, two convicts form a friendship, seeking consolation and, eventually, redemption through basic compassion', + }, + }, + inner_hits: { + 'test.inference.chunks': { + hits: { + hits: [ + { + _source: { + text: 'Over the course of several years', + }, + }, + { + _source: { + text: 'two convicts form a friendship', + }, + }, + { + _source: { + text: 'seeking consolation and, eventually, redemption through basic compassion', + }, + }, + ], + }, + }, + }, + }; + + expect(getValueForSelectedField(hit as any, 'test')).toMatchInlineSnapshot(` + "Over the course of several years + --- + two convicts form a friendship + --- + seeking consolation and, eventually, redemption through basic compassion" + `); }); }); diff --git a/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.ts b/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.ts index 6a2044f2943e4..68bd600d62143 100644 --- a/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.ts +++ b/x-pack/plugins/search_playground/server/utils/get_value_for_selected_field.ts @@ -5,8 +5,20 @@ * 2.0. */ +import { SearchHit } from '@elastic/elasticsearch/lib/api/types'; import { get } from 'lodash'; -export const getValueForSelectedField = (source: unknown, path: string): string => { - return get(source, path, ''); +export const getValueForSelectedField = (hit: SearchHit, path: string): string => { + if (!hit) { + return ''; + } + + // for semantic_text matches + if (!!hit.inner_hits?.[`${path}.inference.chunks`]) { + return hit.inner_hits[`${path}.inference.chunks`].hits.hits + .map((innerHit) => innerHit._source.text) + .join('\n --- \n'); + } + + return get(hit._source, path, ''); };