From 56a24dfc7cafb8cec12bb076f723570dc5288ce6 Mon Sep 17 00:00:00 2001 From: Darren Shepherd Date: Fri, 6 Dec 2024 23:22:48 -0700 Subject: [PATCH] bug: remove apply logic for better performance --- go.mod | 2 +- go.sum | 4 +- .../knowledgesource/knowledgesource.go | 69 +++++++++++++++++-- .../handlers/knowledgesource/metadata.go | 4 +- .../handlers/workflowstep/workflowstep.go | 1 - 5 files changed, 69 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 1692f49bb..22ada302d 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/mhale/smtpd v0.8.3 github.com/oauth2-proxy/oauth2-proxy/v7 v7.0.0-00010101000000-000000000000 github.com/otto8-ai/kinm v0.0.0-20241206174928-11e2e7ac3cfc - github.com/otto8-ai/nah v0.0.0-20241103035236-f2497c45ddb0 + github.com/otto8-ai/nah v0.0.0-20241207061352-8dac36960968 github.com/otto8-ai/namegenerator v0.0.0-20241115024503-63dbbe8f03f4 github.com/otto8-ai/otto8/apiclient v0.0.0-00010101000000-000000000000 github.com/otto8-ai/otto8/logger v0.0.0-00010101000000-000000000000 diff --git a/go.sum b/go.sum index 40235ec73..3e6641e71 100644 --- a/go.sum +++ b/go.sum @@ -517,8 +517,8 @@ github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= github.com/otto8-ai/kinm v0.0.0-20241206174928-11e2e7ac3cfc h1:3BZyCflOSU4SRvVMgJvDVfnlPlbpVc5GDj9bHFw6174= github.com/otto8-ai/kinm v0.0.0-20241206174928-11e2e7ac3cfc/go.mod h1:DOegE8RKFYiE0nmLdkJ3YDR668njuEmmqHE4q5qVNqU= -github.com/otto8-ai/nah v0.0.0-20241103035236-f2497c45ddb0 h1:I+4dYCCPY58dbABkbtEGtLQTx/tJvbZgkfcLj889yQM= -github.com/otto8-ai/nah v0.0.0-20241103035236-f2497c45ddb0/go.mod h1:mo2Dz5pAyZhAVvo+kQEzXZlLzb0SaNzhIate7BQs4Vg= +github.com/otto8-ai/nah v0.0.0-20241207061352-8dac36960968 h1:jW4n9CcBdYQnlOdCYSH2y55OevlycMtylIJz8g4WT2E= +github.com/otto8-ai/nah v0.0.0-20241207061352-8dac36960968/go.mod h1:mo2Dz5pAyZhAVvo+kQEzXZlLzb0SaNzhIate7BQs4Vg= github.com/otto8-ai/namegenerator v0.0.0-20241115024503-63dbbe8f03f4 h1:w/VS6mtnVWy6iDxF4P8W66OuTk8dudWTJZzEtInegLU= github.com/otto8-ai/namegenerator v0.0.0-20241115024503-63dbbe8f03f4/go.mod h1:Z4EotUCaCQ8PA+vS0ZDfTPh7JO9jTgpDoHxA3Zzwwjw= github.com/otto8-ai/oauth2-proxy/v7 v7.0.0-20241008204315-265dabe17f43 h1:zuroXr125FGRsaj9/9brmtWfkiu3XkNoB78owd8lAo4= diff --git a/pkg/controller/handlers/knowledgesource/knowledgesource.go b/pkg/controller/handlers/knowledgesource/knowledgesource.go index 6c7a6bf15..ce8674208 100644 --- a/pkg/controller/handlers/knowledgesource/knowledgesource.go +++ b/pkg/controller/handlers/knowledgesource/knowledgesource.go @@ -6,7 +6,6 @@ import ( "time" "github.com/gptscript-ai/go-gptscript" - "github.com/otto8-ai/nah/pkg/apply" "github.com/otto8-ai/nah/pkg/name" "github.com/otto8-ai/nah/pkg/router" "github.com/otto8-ai/otto8/apiclient/types" @@ -64,17 +63,77 @@ func safeStatusSave(ctx context.Context, c kclient.Client, source *v1.KnowledgeS // This should be the error from the last loop, which should be a conflict return err } + +func reconcileFiles(ctx context.Context, c kclient.Client, existingFiles, newFiles []v1.KnowledgeFile, complete bool) error { + existingNames := map[string]v1.KnowledgeFile{} + for _, file := range existingFiles { + existingNames[file.Name] = file + } + + newNames := map[string]v1.KnowledgeFile{} + for _, file := range newFiles { + newNames[file.Name] = file + } + + for newName, newFile := range newNames { + existingFile, ok := existingNames[newName] + if !ok { + if err := c.Create(ctx, &newFile); apierror.IsAlreadyExists(err) { + if err := c.Get(ctx, kclient.ObjectKeyFromObject(&newFile), &existingFile); err != nil { + return err + } + } else if err != nil { + return err + } else { + continue + } + } + delete(existingNames, newName) + + if existingFile.Spec.FileName != newFile.Spec.FileName || + existingFile.Spec.URL != newFile.Spec.URL || + existingFile.Spec.UpdatedAt != newFile.Spec.UpdatedAt || + existingFile.Spec.Checksum != newFile.Spec.Checksum || + existingFile.Spec.SizeInBytes != newFile.Spec.SizeInBytes { + + existingFile.Spec.FileName = newFile.Spec.FileName + existingFile.Spec.URL = newFile.Spec.URL + existingFile.Spec.UpdatedAt = newFile.Spec.UpdatedAt + existingFile.Spec.Checksum = newFile.Spec.Checksum + existingFile.Spec.SizeInBytes = newFile.Spec.SizeInBytes + + if err := c.Update(ctx, &existingFile); err != nil { + return err + } + } + } + + if complete { + for _, existingFile := range existingNames { + if err := c.Delete(ctx, &existingFile); err != nil { + return err + } + } + } + + return nil +} + func (k *Handler) saveProgress(ctx context.Context, c kclient.Client, source *v1.KnowledgeSource, thread *v1.Thread, complete bool) error { files, syncMetadata, err := k.getMetadata(ctx, source, thread) if err != nil || syncMetadata == nil { return err } - apply := apply.New(c) - if !complete { - apply = apply.WithNoPrune() + var existing v1.KnowledgeFileList + err = c.List(ctx, &existing, kclient.InNamespace(source.Namespace), kclient.MatchingFields{ + "spec.knowledgeSourceName": source.Name, + }) + if err != nil { + return err } - if err = apply.Apply(ctx, source, files...); err != nil { + + if err := reconcileFiles(ctx, c, existing.Items, files, complete); err != nil { return err } diff --git a/pkg/controller/handlers/knowledgesource/metadata.go b/pkg/controller/handlers/knowledgesource/metadata.go index afae633f1..835f4403f 100644 --- a/pkg/controller/handlers/knowledgesource/metadata.go +++ b/pkg/controller/handlers/knowledgesource/metadata.go @@ -43,7 +43,7 @@ type syncMetadata struct { State map[string]any `json:"state,omitempty"` } -func (k *Handler) getMetadata(ctx context.Context, source *v1.KnowledgeSource, thread *v1.Thread) (result []kclient.Object, _ *syncMetadata, _ error) { +func (k *Handler) getMetadata(ctx context.Context, source *v1.KnowledgeSource, thread *v1.Thread) (result []v1.KnowledgeFile, _ *syncMetadata, _ error) { data, err := k.gptClient.ReadFileInWorkspace(ctx, ".metadata.json", gptscript.ReadFileInWorkspaceOptions{ WorkspaceID: thread.Status.WorkspaceID, }) @@ -60,7 +60,7 @@ func (k *Handler) getMetadata(ctx context.Context, source *v1.KnowledgeSource, t } for _, file := range output.Files { - result = append(result, &v1.KnowledgeFile{ + result = append(result, v1.KnowledgeFile{ ObjectMeta: metav1.ObjectMeta{ Name: v1.ObjectNameFromAbsolutePath(filepath.Join(thread.Status.WorkspaceID, file.FilePath)), Namespace: source.Namespace, diff --git a/pkg/controller/handlers/workflowstep/workflowstep.go b/pkg/controller/handlers/workflowstep/workflowstep.go index 9bd636016..2bee431c3 100644 --- a/pkg/controller/handlers/workflowstep/workflowstep.go +++ b/pkg/controller/handlers/workflowstep/workflowstep.go @@ -89,7 +89,6 @@ func (h *Handler) Preconditions(next router.Handler) router.Handler { return next.Handle(req, resp) } - resp.DisablePrune() return nil }) }