-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support basic semantic search with preset workflow template #121
Changes from 10 commits
c90f721
2bb403a
ad260ee
691695a
e7e30b7
1bcfa43
1433839
95a4df7
400a4dd
854b4da
4c109fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
*/ | ||
|
||
import moment from 'moment'; | ||
import { MarkerType } from 'reactflow'; | ||
import { | ||
WorkspaceFlowState, | ||
ReactFlowComponent, | ||
|
@@ -17,21 +18,88 @@ import { | |
DATE_FORMAT_PATTERN, | ||
COMPONENT_CATEGORY, | ||
NODE_CATEGORY, | ||
WorkspaceFormValues, | ||
} from './'; | ||
|
||
// TODO: implement this and remove hardcoded return values | ||
/** | ||
* Converts a ReactFlow workspace flow to a backend-compatible set of ingest and/or search sub-workflows, | ||
* along with a provision sub-workflow if resources are to be created. | ||
* Given a ReactFlow workspace flow and the set of current form values within such flow, | ||
* generate a backend-compatible set of sub-workflows. | ||
* | ||
*/ | ||
export function toTemplateFlows( | ||
workspaceFlow: WorkspaceFlowState | ||
workspaceFlow: WorkspaceFlowState, | ||
formValues: WorkspaceFormValues | ||
): TemplateFlows { | ||
const textEmbeddingTransformerNodeId = Object.keys(formValues).find((key) => | ||
key.includes('text_embedding') | ||
) as string; | ||
const knnIndexerNodeId = Object.keys(formValues).find((key) => | ||
key.includes('knn') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above :) |
||
) as string; | ||
const textEmbeddingFields = formValues[textEmbeddingTransformerNodeId]; | ||
const knnIndexerFields = formValues[knnIndexerNodeId]; | ||
|
||
return { | ||
provision: { | ||
user_params: {} as Map<string, any>, | ||
nodes: [], | ||
edges: [], | ||
nodes: [ | ||
{ | ||
id: 'create_ingest_pipeline', | ||
type: 'create_ingest_pipeline', | ||
user_inputs: { | ||
pipeline_id: 'test-pipeline', | ||
model_id: textEmbeddingFields['modelId'], | ||
input_field: textEmbeddingFields['inputField'], | ||
output_field: textEmbeddingFields['vectorField'], | ||
configurations: { | ||
description: 'A text embedding ingest pipeline', | ||
processors: [ | ||
{ | ||
text_embedding: { | ||
model_id: textEmbeddingFields['modelId'], | ||
field_map: { | ||
[textEmbeddingFields['inputField']]: | ||
textEmbeddingFields['vectorField'], | ||
}, | ||
}, | ||
}, | ||
], | ||
}, | ||
}, | ||
}, | ||
{ | ||
id: 'create_index', | ||
type: 'create_index', | ||
previous_node_inputs: { | ||
create_ingest_pipeline: 'pipeline_id', | ||
}, | ||
user_inputs: { | ||
index_name: knnIndexerFields['indexName'], | ||
configurations: { | ||
settings: { | ||
default_pipeline: '${{create_ingest_pipeline.pipeline_id}}', | ||
}, | ||
mappings: { | ||
properties: { | ||
[textEmbeddingFields['vectorField']]: { | ||
type: 'knn_vector', | ||
dimension: 768, | ||
method: { | ||
engine: 'lucene', | ||
space_type: 'l2', | ||
name: 'hnsw', | ||
parameters: {}, | ||
}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we have static values defined here? Aren't we taking the inputs from the user or future plans of doing so? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above. As a reminder in the description, this is all hardcoded right now for the purpose of getting an initial use case possible |
||
}, | ||
[textEmbeddingFields['inputField']]: { | ||
type: 'text', | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
], | ||
}, | ||
}; | ||
} | ||
|
@@ -47,10 +115,8 @@ export function toWorkspaceFlow( | |
const ingestId1 = generateId('text_embedding_processor'); | ||
const ingestId2 = generateId('knn_index'); | ||
const ingestGroupId = generateId(COMPONENT_CATEGORY.INGEST); | ||
|
||
const searchId1 = generateId('text_embedding_processor'); | ||
const searchId2 = generateId('knn_index'); | ||
const searchGroupId = generateId(COMPONENT_CATEGORY.SEARCH); | ||
const edgeId = generateId('edge'); | ||
|
||
const ingestNodes = [ | ||
{ | ||
|
@@ -61,11 +127,10 @@ export function toWorkspaceFlow( | |
style: { | ||
width: 900, | ||
height: 400, | ||
overflowX: 'auto', | ||
overflowY: 'auto', | ||
}, | ||
className: 'reactflow__group-node__ingest', | ||
selectable: true, | ||
deletable: false, | ||
}, | ||
{ | ||
id: ingestId1, | ||
|
@@ -78,6 +143,7 @@ export function toWorkspaceFlow( | |
parentNode: ingestGroupId, | ||
extent: 'parent', | ||
draggable: true, | ||
deletable: false, | ||
}, | ||
{ | ||
id: ingestId2, | ||
|
@@ -87,6 +153,7 @@ export function toWorkspaceFlow( | |
parentNode: ingestGroupId, | ||
extent: 'parent', | ||
draggable: true, | ||
deletable: false, | ||
}, | ||
] as ReactFlowComponent[]; | ||
|
||
|
@@ -99,38 +166,30 @@ export function toWorkspaceFlow( | |
style: { | ||
width: 900, | ||
height: 400, | ||
overflowX: 'auto', | ||
overflowY: 'auto', | ||
}, | ||
className: 'reactflow__group-node__search', | ||
selectable: true, | ||
}, | ||
{ | ||
id: searchId1, | ||
position: { x: 100, y: 70 }, | ||
data: initComponentData( | ||
new TextEmbeddingTransformer().toObj(), | ||
searchId1 | ||
), | ||
type: NODE_CATEGORY.CUSTOM, | ||
parentNode: searchGroupId, | ||
extent: 'parent', | ||
draggable: true, | ||
}, | ||
{ | ||
id: searchId2, | ||
position: { x: 500, y: 70 }, | ||
data: initComponentData(new KnnIndexer().toObj(), searchId2), | ||
type: NODE_CATEGORY.CUSTOM, | ||
parentNode: searchGroupId, | ||
extent: 'parent', | ||
draggable: true, | ||
deletable: false, | ||
}, | ||
] as ReactFlowComponent[]; | ||
|
||
return { | ||
nodes: [...ingestNodes, ...searchNodes], | ||
edges: [] as ReactFlowEdge[], | ||
edges: [ | ||
{ | ||
id: edgeId, | ||
key: edgeId, | ||
source: ingestId1, | ||
target: ingestId2, | ||
markerEnd: { | ||
type: MarkerType.ArrowClosed, | ||
width: 20, | ||
height: 20, | ||
}, | ||
zIndex: 2, | ||
deletable: false, | ||
}, | ||
] as ReactFlowEdge[], | ||
}; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Constant for
text_embedding
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah this entire fn will be more generic. Just a first set of hardcoded vals to get e2e working.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See TODO on line 24.