From 4f968fbe6dc2c3ccb7652d8b22e3922e71bd7ff9 Mon Sep 17 00:00:00 2001 From: Daishan Peng Date: Fri, 11 Oct 2024 09:45:52 -0700 Subject: [PATCH] Enhance: add more states to integration, control ingestion (#163) Signed-off-by: Daishan Peng --- apiclient/types/knowledge.go | 1 + apiclient/types/remoteKnowledgeSource.go | 26 +++-- apiclient/types/zz_generated.deepcopy.go | 29 ++++++ pkg/api/handlers/remoteknowledgesource.go | 24 ++++- .../handlers/uploads/remoteknowledgesource.go | 30 +++--- .../openapi/generated/openapi_generated.go | 99 ++++++++++++++++++- 6 files changed, 183 insertions(+), 26 deletions(-) diff --git a/apiclient/types/knowledge.go b/apiclient/types/knowledge.go index a3044382..562f22d7 100644 --- a/apiclient/types/knowledge.go +++ b/apiclient/types/knowledge.go @@ -17,6 +17,7 @@ type FileDetails struct { FilePath string `json:"filePath,omitempty"` URL string `json:"url,omitempty"` UpdatedAt string `json:"updatedAt,omitempty"` + Checksum string `json:"checksum,omitempty"` } type IngestionStatus struct { diff --git a/apiclient/types/remoteKnowledgeSource.go b/apiclient/types/remoteKnowledgeSource.go index aedbe4f1..7e0a9704 100644 --- a/apiclient/types/remoteKnowledgeSource.go +++ b/apiclient/types/remoteKnowledgeSource.go @@ -28,11 +28,12 @@ type RemoteKnowledgeSourceManifest struct { type RemoteKnowledgeSourceList List[RemoteKnowledgeSource] type RemoteKnowledgeSourceInput struct { - SourceType RemoteKnowledgeSourceType `json:"sourceType,omitempty"` - Exclude []string `json:"exclude,omitempty"` - OneDriveConfig *OneDriveConfig `json:"onedriveConfig,omitempty"` - NotionConfig *NotionConfig `json:"notionConfig,omitempty"` - WebsiteCrawlingConfig *WebsiteCrawlingConfig `json:"websiteCrawlingConfig,omitempty"` + DisableIngestionAfterSync bool `json:"disableIngestionAfterSync,omitempty"` + SourceType RemoteKnowledgeSourceType `json:"sourceType,omitempty"` + Exclude []string `json:"exclude,omitempty"` + OneDriveConfig *OneDriveConfig `json:"onedriveConfig,omitempty"` + NotionConfig *NotionConfig `json:"notionConfig,omitempty"` + WebsiteCrawlingConfig *WebsiteCrawlingConfig `json:"websiteCrawlingConfig,omitempty"` } type OneDriveConfig struct { @@ -54,7 +55,14 @@ type RemoteKnowledgeSourceState struct { } type OneDriveLinksConnectorState struct { - Folders FolderSet `json:"folders,omitempty"` + Folders FolderSet `json:"folders,omitempty"` + Files map[string]FileState `json:"files,omitempty"` +} + +type FileState struct { + FileName string `json:"fileName,omitempty"` + FolderPath string `json:"folderPath,omitempty"` + URL string `json:"url,omitempty"` } type NotionConnectorState struct { @@ -62,11 +70,13 @@ type NotionConnectorState struct { } type NotionPage struct { - URL string `json:"url,omitempty"` - Title string `json:"title,omitempty"` + URL string `json:"url,omitempty"` + Title string `json:"title,omitempty"` + FolderPath string `json:"folderPath,omitempty"` } type WebsiteCrawlingConnectorState struct { ScrapeJobIds map[string]string `json:"scrapeJobIds"` Folders FolderSet `json:"folders"` + Pages map[string]Item `json:"pages"` } diff --git a/apiclient/types/zz_generated.deepcopy.go b/apiclient/types/zz_generated.deepcopy.go index 31c8c2b3..f150227c 100644 --- a/apiclient/types/zz_generated.deepcopy.go +++ b/apiclient/types/zz_generated.deepcopy.go @@ -296,6 +296,21 @@ func (in *FileList) DeepCopy() *FileList { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FileState) DeepCopyInto(out *FileState) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FileState. +func (in *FileState) DeepCopy() *FileState { + if in == nil { + return nil + } + out := new(FileState) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in FolderSet) DeepCopyInto(out *FolderSet) { { @@ -607,6 +622,13 @@ func (in *OneDriveLinksConnectorState) DeepCopyInto(out *OneDriveLinksConnectorS (*out)[key] = val } } + if in.Files != nil { + in, out := &in.Files, &out.Files + *out = make(map[string]FileState, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OneDriveLinksConnectorState. @@ -1300,6 +1322,13 @@ func (in *WebsiteCrawlingConnectorState) DeepCopyInto(out *WebsiteCrawlingConnec (*out)[key] = val } } + if in.Pages != nil { + in, out := &in.Pages, &out.Pages + *out = make(map[string]Item, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WebsiteCrawlingConnectorState. diff --git a/pkg/api/handlers/remoteknowledgesource.go b/pkg/api/handlers/remoteknowledgesource.go index 6d239aca..ec12183c 100644 --- a/pkg/api/handlers/remoteknowledgesource.go +++ b/pkg/api/handlers/remoteknowledgesource.go @@ -3,6 +3,7 @@ package handlers import ( "fmt" "net/http" + "reflect" "github.com/otto8-ai/otto8/apiclient/types" "github.com/otto8-ai/otto8/pkg/api" @@ -71,14 +72,17 @@ func updateRemoteKnowledgeSource(req api.Context, linksID, parentName string, pa return fmt.Errorf("failed to decode request body: %w", err) } + configChanged := checkConfigChanged(input, remoteKnowledgeSource) remoteKnowledgeSource.Spec.RemoteKnowledgeSourceInput = input if err := req.Update(&remoteKnowledgeSource); err != nil { return fmt.Errorf("failed to update RemoteKnowledgeSource: %w", err) } - if err := createSyncRequest(req, remoteKnowledgeSource); err != nil { - return fmt.Errorf("failed to create sync request: %w", err) + if configChanged { + if err := createSyncRequest(req, remoteKnowledgeSource); err != nil { + return fmt.Errorf("failed to create sync request: %w", err) + } } req.WriteHeader(http.StatusNoContent) @@ -179,3 +183,19 @@ func convertRemoteKnowledgeSource(remoteKnowledgeSource v1.RemoteKnowledgeSource Error: remoteKnowledgeSource.Status.Error, } } + +func checkConfigChanged(input types.RemoteKnowledgeSourceInput, remoteKnowledgeSource v1.RemoteKnowledgeSource) bool { + if input.OneDriveConfig != nil && remoteKnowledgeSource.Spec.OneDriveConfig != nil { + return !reflect.DeepEqual(*input.OneDriveConfig, *remoteKnowledgeSource.Spec.OneDriveConfig) + } + + if input.NotionConfig != nil && remoteKnowledgeSource.Spec.NotionConfig != nil { + return !reflect.DeepEqual(*input.NotionConfig, *remoteKnowledgeSource.Spec.NotionConfig) + } + + if input.WebsiteCrawlingConfig != nil && remoteKnowledgeSource.Spec.WebsiteCrawlingConfig != nil { + return !reflect.DeepEqual(*input.WebsiteCrawlingConfig, *remoteKnowledgeSource.Spec.WebsiteCrawlingConfig) + } + + return true +} diff --git a/pkg/controller/handlers/uploads/remoteknowledgesource.go b/pkg/controller/handlers/uploads/remoteknowledgesource.go index 5a933fab..20cd74a4 100644 --- a/pkg/controller/handlers/uploads/remoteknowledgesource.go +++ b/pkg/controller/handlers/uploads/remoteknowledgesource.go @@ -307,22 +307,24 @@ func (u *UploadHandler) HandleUploadRun(req router.Request, resp router.Response remoteKnowledgeSource.Status.RunName = "" // Create object to re-ingest knowledge - resp.Objects( - &v1.IngestKnowledgeRequest{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: system.IngestRequestPrefix, - Namespace: req.Namespace, - Annotations: map[string]string{ - // Don't prune because the cleanup handler will do that. - apply.AnnotationPrune: "false", + if !remoteKnowledgeSource.Spec.DisableIngestionAfterSync { + resp.Objects( + &v1.IngestKnowledgeRequest{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: system.IngestRequestPrefix, + Namespace: req.Namespace, + Annotations: map[string]string{ + // Don't prune because the cleanup handler will do that. + apply.AnnotationPrune: "false", + }, + }, + Spec: v1.IngestKnowledgeRequestSpec{ + WorkspaceName: ws.Name, + HasKnowledge: len(fileMetadata) > 0, }, }, - Spec: v1.IngestKnowledgeRequestSpec{ - WorkspaceName: ws.Name, - HasKnowledge: len(fileMetadata) > 0, - }, - }, - ) + ) + } return nil } diff --git a/pkg/storage/openapi/generated/openapi_generated.go b/pkg/storage/openapi/generated/openapi_generated.go index 52fd1790..5624b40b 100644 --- a/pkg/storage/openapi/generated/openapi_generated.go +++ b/pkg/storage/openapi/generated/openapi_generated.go @@ -31,6 +31,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/otto8-ai/otto8/apiclient/types.File": schema_otto8_ai_otto8_apiclient_types_File(ref), "github.com/otto8-ai/otto8/apiclient/types.FileDetails": schema_otto8_ai_otto8_apiclient_types_FileDetails(ref), "github.com/otto8-ai/otto8/apiclient/types.FileList": schema_otto8_ai_otto8_apiclient_types_FileList(ref), + "github.com/otto8-ai/otto8/apiclient/types.FileState": schema_otto8_ai_otto8_apiclient_types_FileState(ref), "github.com/otto8-ai/otto8/apiclient/types.If": schema_otto8_ai_otto8_apiclient_types_If(ref), "github.com/otto8-ai/otto8/apiclient/types.IngestionStatus": schema_otto8_ai_otto8_apiclient_types_IngestionStatus(ref), "github.com/otto8-ai/otto8/apiclient/types.Item": schema_otto8_ai_otto8_apiclient_types_Item(ref), @@ -695,6 +696,12 @@ func schema_otto8_ai_otto8_apiclient_types_FileDetails(ref common.ReferenceCallb Format: "", }, }, + "checksum": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, }, }, @@ -729,6 +736,36 @@ func schema_otto8_ai_otto8_apiclient_types_FileList(ref common.ReferenceCallback } } +func schema_otto8_ai_otto8_apiclient_types_FileState(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "fileName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "folderPath": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "url": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + } +} + func schema_otto8_ai_otto8_apiclient_types_If(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -1135,6 +1172,12 @@ func schema_otto8_ai_otto8_apiclient_types_NotionPage(ref common.ReferenceCallba Format: "", }, }, + "folderPath": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, }, }, @@ -1355,11 +1398,25 @@ func schema_otto8_ai_otto8_apiclient_types_OneDriveLinksConnectorState(ref commo }, }, }, + "files": { + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + AdditionalProperties: &spec.SchemaOrBool{ + Allows: true, + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/otto8-ai/otto8/apiclient/types.FileState"), + }, + }, + }, + }, + }, }, }, }, Dependencies: []string{ - "github.com/otto8-ai/otto8/apiclient/types.Item"}, + "github.com/otto8-ai/otto8/apiclient/types.FileState", "github.com/otto8-ai/otto8/apiclient/types.Item"}, } } @@ -1561,6 +1618,12 @@ func schema_otto8_ai_otto8_apiclient_types_RemoteKnowledgeSource(ref common.Refe Format: "", }, }, + "disableIngestionAfterSync": { + SchemaProps: spec.SchemaProps{ + Type: []string{"boolean"}, + Format: "", + }, + }, "sourceType": { SchemaProps: spec.SchemaProps{ Type: []string{"string"}, @@ -1653,6 +1716,12 @@ func schema_otto8_ai_otto8_apiclient_types_RemoteKnowledgeSourceInput(ref common SchemaProps: spec.SchemaProps{ Type: []string{"object"}, Properties: map[string]spec.Schema{ + "disableIngestionAfterSync": { + SchemaProps: spec.SchemaProps{ + Type: []string{"boolean"}, + Format: "", + }, + }, "sourceType": { SchemaProps: spec.SchemaProps{ Type: []string{"string"}, @@ -1736,6 +1805,12 @@ func schema_otto8_ai_otto8_apiclient_types_RemoteKnowledgeSourceManifest(ref com Format: "", }, }, + "disableIngestionAfterSync": { + SchemaProps: spec.SchemaProps{ + Type: []string{"boolean"}, + Format: "", + }, + }, "sourceType": { SchemaProps: spec.SchemaProps{ Type: []string{"string"}, @@ -2757,8 +2832,22 @@ func schema_otto8_ai_otto8_apiclient_types_WebsiteCrawlingConnectorState(ref com }, }, }, + "pages": { + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + AdditionalProperties: &spec.SchemaOrBool{ + Allows: true, + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/otto8-ai/otto8/apiclient/types.Item"), + }, + }, + }, + }, + }, }, - Required: []string{"scrapeJobIds", "folders"}, + Required: []string{"scrapeJobIds", "folders", "pages"}, }, }, Dependencies: []string{ @@ -4364,6 +4453,12 @@ func schema_storage_apis_ottogptscriptai_v1_RemoteKnowledgeSourceSpec(ref common Format: "", }, }, + "disableIngestionAfterSync": { + SchemaProps: spec.SchemaProps{ + Type: []string{"boolean"}, + Format: "", + }, + }, "sourceType": { SchemaProps: spec.SchemaProps{ Type: []string{"string"},