From 6ae38dc0b96bda488c3be02ee48d4c43ba181093 Mon Sep 17 00:00:00 2001 From: Donnie Adams Date: Mon, 23 Sep 2024 17:44:03 -0400 Subject: [PATCH] feat: add KnowledgeFile CR Signed-off-by: Donnie Adams --- pkg/api/handlers/agent.go | 27 +- pkg/api/handlers/files.go | 95 ++++- pkg/api/handlers/threads.go | 27 +- pkg/api/handlers/workflows.go | 23 +- pkg/api/router/router.go | 6 +- .../handlers/knowledge/knowledge.go | 168 ++++++-- pkg/controller/handlers/runs/runs.go | 5 +- pkg/controller/handlers/threads/threads.go | 14 +- pkg/controller/handlers/uploads/onedrive.go | 172 ++++++++- .../handlers/workspace/workspace.go | 11 +- pkg/controller/routes.go | 17 +- pkg/invoke/invoker.go | 10 +- pkg/knowledge/knowledge.go | 53 +-- pkg/render/render.go | 8 +- .../apis/otto.gptscript.ai/v1/agent.go | 17 +- pkg/storage/apis/otto.gptscript.ai/v1/file.go | 94 +++++ pkg/storage/apis/otto.gptscript.ai/v1/run.go | 10 +- .../apis/otto.gptscript.ai/v1/scheme.go | 9 +- .../apis/otto.gptscript.ai/v1/thread.go | 24 +- .../apis/otto.gptscript.ai/v1/upload.go | 22 +- .../apis/otto.gptscript.ai/v1/workflow.go | 16 +- .../v1/zz_generated.deepcopy.go | 142 ++++++- .../openapi/generated/openapi_generated.go | 360 +++++++++++++++++- pkg/storage/selectors/selectors.go | 10 + 24 files changed, 1123 insertions(+), 217 deletions(-) create mode 100644 pkg/storage/apis/otto.gptscript.ai/v1/file.go create mode 100644 pkg/storage/selectors/selectors.go diff --git a/pkg/api/handlers/agent.go b/pkg/api/handlers/agent.go index 9b5082685..d4045bff0 100644 --- a/pkg/api/handlers/agent.go +++ b/pkg/api/handlers/agent.go @@ -71,7 +71,6 @@ func (a *AgentHandler) Create(req api.Context) error { ObjectMeta: metav1.ObjectMeta{ GenerateName: system.AgentPrefix, Namespace: req.Namespace(), - Finalizers: []string{v1.AgentFinalizer}, }, Spec: v1.AgentSpec{ Manifest: manifest, @@ -146,33 +145,29 @@ func (a *AgentHandler) UploadFile(req api.Context) error { return fmt.Errorf("failed to get agent with id %s: %w", id, err) } - return uploadFile(req.Context(), req, a.workspaceClient, agent.Status.Workspace.WorkspaceID) + if err := uploadFile(req.Context(), req, a.workspaceClient, agent.Status.Workspace.WorkspaceID); err != nil { + return err + } + + req.WriteHeader(http.StatusCreated) + return nil } func (a *AgentHandler) DeleteFile(req api.Context) error { var ( - id = req.PathValue("id") - filename = req.PathValue("file") - agent v1.Agent + id = req.PathValue("id") + agent v1.Agent ) if err := req.Get(&agent, id); err != nil { return fmt.Errorf("failed to get agent with id %s: %w", id, err) } - return deleteFile(req.Context(), req, a.workspaceClient, agent.Status.Workspace.WorkspaceID, filename) + return deleteFile(req.Context(), req, a.workspaceClient, agent.Status.Workspace.WorkspaceID) } func (a *AgentHandler) Knowledge(req api.Context) error { - var ( - id = req.PathValue("id") - agent v1.Agent - ) - if err := req.Get(&agent, id); err != nil { - return fmt.Errorf("failed to get agent with id %s: %w", id, err) - } - - return listFiles(req.Context(), req, a.workspaceClient, agent.Status.KnowledgeWorkspace.KnowledgeWorkspaceID) + return listKnowledgeFiles(req, new(v1.Agent)) } func (a *AgentHandler) UploadKnowledge(req api.Context) error { @@ -180,7 +175,7 @@ func (a *AgentHandler) UploadKnowledge(req api.Context) error { } func (a *AgentHandler) DeleteKnowledge(req api.Context) error { - return deleteKnowledge(req, a.workspaceClient, req.PathValue("file"), req.PathValue("id"), new(v1.Agent)) + return deleteKnowledge(req, req.PathValue("file"), req.PathValue("id"), new(v1.Agent)) } func (a *AgentHandler) IngestKnowledge(req api.Context) error { diff --git a/pkg/api/handlers/files.go b/pkg/api/handlers/files.go index 80466da79..571263767 100644 --- a/pkg/api/handlers/files.go +++ b/pkg/api/handlers/files.go @@ -2,14 +2,24 @@ package handlers import ( "context" + "errors" "fmt" "io" "net/http" + "path/filepath" + "strings" "github.com/gptscript-ai/otto/pkg/api" "github.com/gptscript-ai/otto/pkg/api/types" "github.com/gptscript-ai/otto/pkg/knowledge" + v1 "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1" + "github.com/gptscript-ai/otto/pkg/storage/selectors" + "github.com/gptscript-ai/otto/pkg/workspace" wclient "github.com/thedadams/workspace-provider/pkg/client" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "sigs.k8s.io/controller-runtime/pkg/client" ) func listFiles(ctx context.Context, req api.Context, wc *wclient.Client, workspaceID string) error { @@ -21,19 +31,64 @@ func listFiles(ctx context.Context, req api.Context, wc *wclient.Client, workspa return req.Write(types.FileList{Items: files}) } +func listKnowledgeFiles(req api.Context, parentObj knowledge.Knowledgeable) error { + if err := req.Get(parentObj, req.PathValue("id")); err != nil { + return fmt.Errorf("failed to get the parent object: %w", err) + } + + var files v1.KnowledgeFileList + if err := req.Storage.List(req.Context(), &files, &client.ListOptions{ + FieldSelector: fields.SelectorFromSet(selectors.RemoveEmpty(map[string]string{ + "spec.agentName": parentObj.AgentName(), + "spec.workflowName": parentObj.WorkflowName(), + "spec.threadName": parentObj.ThreadName(), + })), + Namespace: parentObj.GetNamespace(), + }); err != nil { + return err + } + + return req.Write(files) +} + func uploadKnowledge(req api.Context, workspaceClient *wclient.Client, parentName string, toUpdate knowledge.Knowledgeable) error { if err := req.Get(toUpdate, parentName); err != nil { return fmt.Errorf("failed to get parent with id %s: %w", req.PathValue("id"), err) } - status := toUpdate.GetKnowledgeWorkspaceStatus() + status := toUpdate.KnowledgeWorkspaceStatus() if err := uploadFile(req.Context(), req, workspaceClient, status.KnowledgeWorkspaceID); err != nil { return err } + filename := req.PathValue("file") + file := &v1.KnowledgeFile{ + ObjectMeta: metav1.ObjectMeta{ + Name: v1.ObjectNameFromAbsolutePath( + filepath.Join(workspace.GetDir(status.KnowledgeWorkspaceID), filename), + ), + Namespace: toUpdate.GetNamespace(), + }, + Spec: v1.KnowledgeFileSpec{ + FileName: filename, + AgentName: toUpdate.AgentName(), + WorkflowName: toUpdate.WorkflowName(), + ThreadName: toUpdate.ThreadName(), + }, + } + + if err := req.Storage.Create(req.Context(), file); err != nil && !apierrors.IsAlreadyExists(err) { + _ = deleteFile(req.Context(), req, workspaceClient, status.KnowledgeWorkspaceID) + return err + } + status.KnowledgeGeneration++ status.HasKnowledge = true - return req.Storage.Status().Update(req.Context(), toUpdate) + if err := req.Storage.Status().Update(req.Context(), toUpdate); err != nil { + return err + } + + return req.Write(file) } func uploadFile(ctx context.Context, req api.Context, wc *wclient.Client, workspaceID string) error { @@ -52,32 +107,38 @@ func uploadFile(ctx context.Context, req api.Context, wc *wclient.Client, worksp return fmt.Errorf("failed to write file %q to workspace %q: %w", file, workspaceID, err) } - req.WriteHeader(http.StatusCreated) - return nil } -func deleteKnowledge(req api.Context, workspaceClient *wclient.Client, filename, parentName string, toUpdate knowledge.Knowledgeable) error { +func deleteKnowledge(req api.Context, filename, parentName string, toUpdate knowledge.Knowledgeable) error { if err := req.Get(toUpdate, parentName); err != nil { return fmt.Errorf("failed to get parent with id %s: %w", parentName, err) } - status := toUpdate.GetKnowledgeWorkspaceStatus() - if err := deleteFile(req.Context(), req, workspaceClient, status.KnowledgeWorkspaceID, filename); err != nil { + fileObjectName := v1.ObjectNameFromAbsolutePath( + filepath.Join(workspace.GetDir(toUpdate.KnowledgeWorkspaceStatus().KnowledgeWorkspaceID), filename), + ) + + if err := req.Storage.Delete(req.Context(), &v1.KnowledgeFile{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: toUpdate.GetNamespace(), + Name: fileObjectName, + }, + }); err != nil { + var apiErr *apierrors.StatusError + if errors.As(err, &apiErr) { + apiErr.ErrStatus.Details.Name = filename + apiErr.ErrStatus.Message = strings.ReplaceAll(apiErr.ErrStatus.Message, fileObjectName, filename) + } return err } - files, err := workspaceClient.Ls(req.Context(), status.KnowledgeWorkspaceID) - if err != nil { - return fmt.Errorf("failed to list files in workspace %s: %w", status.KnowledgeWorkspaceID, err) - } - - status.KnowledgeGeneration++ - status.HasKnowledge = len(files) > 0 - return req.Storage.Status().Update(req.Context(), toUpdate) + req.WriteHeader(http.StatusNoContent) + return nil } -func deleteFile(ctx context.Context, req api.Context, wc *wclient.Client, workspaceID, filename string) error { +func deleteFile(ctx context.Context, req api.Context, wc *wclient.Client, workspaceID string) error { + filename := req.PathValue("file") if err := wc.DeleteFile(ctx, workspaceID, filename); err != nil { return fmt.Errorf("failed to delete file %q from workspace %q: %w", filename, workspaceID, err) } @@ -92,7 +153,7 @@ func ingestKnowledge(req api.Context, workspaceClient *wclient.Client, parentNam return fmt.Errorf("failed to get parent with id %s: %w", req.PathValue("id"), err) } - status := toUpdate.GetKnowledgeWorkspaceStatus() + status := toUpdate.KnowledgeWorkspaceStatus() files, err := workspaceClient.Ls(req.Context(), status.KnowledgeWorkspaceID) if err != nil { return err diff --git a/pkg/api/handlers/threads.go b/pkg/api/handlers/threads.go index 98d6dbe37..3057c03cf 100644 --- a/pkg/api/handlers/threads.go +++ b/pkg/api/handlers/threads.go @@ -2,6 +2,7 @@ package handlers import ( "fmt" + "net/http" "github.com/gptscript-ai/otto/pkg/api" "github.com/gptscript-ai/otto/pkg/api/types" @@ -136,33 +137,29 @@ func (a *ThreadHandler) UploadFile(req api.Context) error { return fmt.Errorf("failed to get thread with id %s: %w", id, err) } - return uploadFile(req.Context(), req, a.workspaceClient, thread.Spec.WorkspaceID) + if err := uploadFile(req.Context(), req, a.workspaceClient, thread.Spec.WorkspaceID); err != nil { + return err + } + + req.WriteHeader(http.StatusCreated) + return nil } func (a *ThreadHandler) DeleteFile(req api.Context) error { var ( - id = req.PathValue("id") - filename = req.PathValue("file") - thread v1.Thread + id = req.PathValue("id") + thread v1.Thread ) if err := req.Get(&thread, id); err != nil { return fmt.Errorf("failed to get thread with id %s: %w", id, err) } - return deleteFile(req.Context(), req, a.workspaceClient, thread.Spec.WorkspaceID, filename) + return deleteFile(req.Context(), req, a.workspaceClient, thread.Spec.WorkspaceID) } func (a *ThreadHandler) Knowledge(req api.Context) error { - var ( - id = req.PathValue("id") - thread v1.Thread - ) - if err := req.Get(&thread, id); err != nil { - return fmt.Errorf("failed to get thread with id %s: %w", id, err) - } - - return listFiles(req.Context(), req, a.workspaceClient, thread.Spec.KnowledgeWorkspaceID) + return listKnowledgeFiles(req, new(v1.Thread)) } func (a *ThreadHandler) UploadKnowledge(req api.Context) error { @@ -170,7 +167,7 @@ func (a *ThreadHandler) UploadKnowledge(req api.Context) error { } func (a *ThreadHandler) DeleteKnowledge(req api.Context) error { - return deleteKnowledge(req, a.workspaceClient, req.PathValue("file"), req.PathValue("id"), new(v1.Thread)) + return deleteKnowledge(req, req.PathValue("file"), req.PathValue("id"), new(v1.Thread)) } func (a *ThreadHandler) IngestKnowledge(req api.Context) error { diff --git a/pkg/api/handlers/workflows.go b/pkg/api/handlers/workflows.go index 01267442d..678f9d431 100644 --- a/pkg/api/handlers/workflows.go +++ b/pkg/api/handlers/workflows.go @@ -75,7 +75,6 @@ func (a *WorkflowHandler) Create(req api.Context) error { ObjectMeta: metav1.ObjectMeta{ GenerateName: system.WorkflowPrefix, Namespace: req.Namespace(), - Finalizers: []string{v1.WorkflowFinalizer}, }, Spec: v1.WorkflowSpec{ Manifest: manifest, @@ -150,13 +149,17 @@ func (a *WorkflowHandler) UploadFile(req api.Context) error { return fmt.Errorf("failed to get workflow with id %s: %w", id, err) } - return uploadFile(req.Context(), req, a.workspaceClient, workflow.Status.Workspace.WorkspaceID) + if err := uploadFile(req.Context(), req, a.workspaceClient, workflow.Status.Workspace.WorkspaceID); err != nil { + return err + } + + req.WriteHeader(http.StatusCreated) + return nil } func (a *WorkflowHandler) DeleteFile(req api.Context) error { var ( id = req.PathValue("id") - filename = req.PathValue("file") workflow v1.Workflow ) @@ -164,19 +167,11 @@ func (a *WorkflowHandler) DeleteFile(req api.Context) error { return fmt.Errorf("failed to get workflow with id %s: %w", id, err) } - return deleteFile(req.Context(), req, a.workspaceClient, workflow.Status.Workspace.WorkspaceID, filename) + return deleteFile(req.Context(), req, a.workspaceClient, workflow.Status.Workspace.WorkspaceID) } func (a *WorkflowHandler) Knowledge(req api.Context) error { - var ( - id = req.PathValue("id") - workflow v1.Workflow - ) - if err := req.Get(&workflow, id); err != nil { - return fmt.Errorf("failed to get workflow with id %s: %w", id, err) - } - - return listFiles(req.Context(), req, a.workspaceClient, workflow.Status.KnowledgeWorkspace.KnowledgeWorkspaceID) + return listKnowledgeFiles(req, new(v1.Workflow)) } func (a *WorkflowHandler) UploadKnowledge(req api.Context) error { @@ -184,7 +179,7 @@ func (a *WorkflowHandler) UploadKnowledge(req api.Context) error { } func (a *WorkflowHandler) DeleteKnowledge(req api.Context) error { - return deleteKnowledge(req, a.workspaceClient, req.PathValue("file"), req.PathValue("id"), new(v1.Workflow)) + return deleteKnowledge(req, req.PathValue("file"), req.PathValue("id"), new(v1.Workflow)) } func (a *WorkflowHandler) IngestKnowledge(req api.Context) error { diff --git a/pkg/api/router/router.go b/pkg/api/router/router.go index c155537a8..2a30c824b 100644 --- a/pkg/api/router/router.go +++ b/pkg/api/router/router.go @@ -34,7 +34,7 @@ func Router(services *services.Services) (http.Handler, error) { mux.Handle("GET /agents/{id}/knowledge", w(agents.Knowledge)) mux.Handle("POST /agents/{id}/knowledge", w(agents.IngestKnowledge)) mux.Handle("POST /agents/{id}/knowledge/{file}", w(agents.UploadKnowledge)) - mux.Handle("DELETE /agents/{id}/knowledge/{file}", w(agents.DeleteKnowledge)) + mux.Handle("DELETE /agents/{id}/knowledge/{file...}", w(agents.DeleteKnowledge)) mux.Handle("POST /agents/{agent_id}/onedrive-links", w(agents.CreateOnedriveLinks)) mux.Handle("GET /agents/{agent_id}/onedrive-links", w(agents.GetOnedriveLinks)) @@ -59,7 +59,7 @@ func Router(services *services.Services) (http.Handler, error) { mux.Handle("GET /workflows/{id}/knowledge", w(workflows.Knowledge)) mux.Handle("POST /workflows/{id}/knowledge", w(workflows.IngestKnowledge)) mux.Handle("POST /workflows/{id}/knowledge/{file}", w(workflows.UploadKnowledge)) - mux.Handle("DELETE /workflows/{id}/knowledge/{file}", w(workflows.DeleteKnowledge)) + mux.Handle("DELETE /workflows/{id}/knowledge/{file...}", w(workflows.DeleteKnowledge)) mux.Handle("POST /workflows/{workflow_id}/onedrive-links", w(workflows.CreateOnedriveLinks)) mux.Handle("GET /workflows/{workflow_id}/onedrive-links", w(workflows.GetOnedriveLinks)) @@ -87,7 +87,7 @@ func Router(services *services.Services) (http.Handler, error) { mux.Handle("GET /threads/{id}/knowledge", w(threads.Knowledge)) mux.Handle("POST /threads/{id}/knowledge", w(threads.IngestKnowledge)) mux.Handle("POST /threads/{id}/knowledge/{file}", w(threads.UploadKnowledge)) - mux.Handle("DELETE /threads/{id}/knowledge/{file}", w(threads.DeleteKnowledge)) + mux.Handle("DELETE /threads/{id}/knowledge/{file...}", w(threads.DeleteKnowledge)) // Runs mux.Handle("GET /runs", w(runs.List)) diff --git a/pkg/controller/handlers/knowledge/knowledge.go b/pkg/controller/handlers/knowledge/knowledge.go index 5803e3752..1f5ce03fe 100644 --- a/pkg/controller/handlers/knowledge/knowledge.go +++ b/pkg/controller/handlers/knowledge/knowledge.go @@ -1,10 +1,21 @@ package knowledge import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "github.com/acorn-io/baaah/pkg/router" + "github.com/gptscript-ai/otto/pkg/invoke" "github.com/gptscript-ai/otto/pkg/knowledge" + "github.com/gptscript-ai/otto/pkg/mvl" v1 "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1" wclient "github.com/thedadams/workspace-provider/pkg/client" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/retry" + kclient "sigs.k8s.io/controller-runtime/pkg/client" ) type Handler struct { @@ -23,7 +34,7 @@ func New(wc *wclient.Client, ingester *knowledge.Ingester, wp string) *Handler { func (a *Handler) CreateWorkspace(req router.Request, resp router.Response) error { knowledged := req.Object.(knowledge.Knowledgeable) - status := knowledged.GetKnowledgeWorkspaceStatus() + status := knowledged.KnowledgeWorkspaceStatus() if status.KnowledgeWorkspaceID != "" { return nil } @@ -44,21 +55,19 @@ func (a *Handler) CreateWorkspace(req router.Request, resp router.Response) erro return nil } -func (a *Handler) RemoveWorkspace(req router.Request, resp router.Response) error { +func (a *Handler) RemoveWorkspace(req router.Request, _ router.Response) error { knowledged := req.Object.(knowledge.Knowledgeable) - status := knowledged.GetKnowledgeWorkspaceStatus() + status := knowledged.KnowledgeWorkspaceStatus() if status.HasKnowledge { - var agentName string - switch obj := knowledged.(type) { - case *v1.Agent: - agentName = obj.Name - case *v1.Thread: - agentName = obj.Spec.AgentName + run, err := a.ingester.DeleteKnowledge(req.Ctx, knowledged.AgentName(), knowledged.GetNamespace(), status.KnowledgeWorkspaceID) + if err != nil { + return err } - if err := a.ingester.DeleteKnowledge(req.Ctx, agentName, knowledged.GetNamespace(), status.KnowledgeWorkspaceID); err != nil { - return err + run.Wait() + if run.Run.Status.Error != "" { + return fmt.Errorf("failed to delete knowledge: %s", run.Run.Status.Error) } } @@ -69,26 +78,139 @@ func (a *Handler) RemoveWorkspace(req router.Request, resp router.Response) erro return nil } -// TODO(thedadams): add another handler that pulls the status logs off the run and stores them. -func (a *Handler) IngestKnowledge(req router.Request, resp router.Response) error { +func (a *Handler) IngestKnowledge(req router.Request, _ router.Response) error { knowledged := req.Object.(knowledge.Knowledgeable) - status := knowledged.GetKnowledgeWorkspaceStatus() - if status.KnowledgeGeneration == status.ObservedKnowledgeGeneration || !status.HasKnowledge { + status := knowledged.KnowledgeWorkspaceStatus() + if status.KnowledgeGeneration == status.ObservedKnowledgeGeneration || status.IngestionRunName != "" { + // If the RunName is set, then there is an ingestion in progress. + // Wait for it to complete before starting another. return nil } - var agentName string - switch obj := knowledged.(type) { - case *v1.Agent: - agentName = obj.Name - case *v1.Thread: - agentName = obj.Spec.AgentName - } + var ( + run *invoke.Response + err error + ) - if err := a.ingester.IngestKnowledge(req.Ctx, agentName, knowledged.GetNamespace(), status.KnowledgeWorkspaceID); err != nil { + run, err = a.ingester.IngestKnowledge(req.Ctx, knowledged.AgentName(), knowledged.GetNamespace(), status.KnowledgeWorkspaceID) + if err != nil { return err } + go compileFileStatuses(req.Ctx, req.Client, knowledged, run, mvl.Package()) + status.ObservedKnowledgeGeneration = status.KnowledgeGeneration + status.IngestionRunName = run.Run.Name return nil } + +func compileFileStatuses(ctx context.Context, client kclient.Client, knowledged knowledge.Knowledgeable, run *invoke.Response, logger mvl.Logger) { + for e := range run.Events { + for _, line := range strings.Split(e.Content, "\n") { + if line == "" || line[0] != '{' { + continue + } + var ingestionStatus v1.IngestionStatus + if err := json.Unmarshal([]byte(line), &ingestionStatus); err != nil { + logger.Errorf("failed to unmarshal event: %s", err) + } + + if ingestionStatus.Filepath == "" { + // Not a file status log. + continue + } + + var file v1.KnowledgeFile + if err := client.Get(ctx, router.Key(knowledged.GetNamespace(), v1.ObjectNameFromAbsolutePath(ingestionStatus.Filepath)), &file); err != nil { + logger.Errorf("failed to get file: %s", err) + continue + } + + if err := json.Unmarshal([]byte(line), &file.Status.IngestionStatus); err != nil { + logger.Errorf("failed to into file status: %s", err) + } + + if err := client.Status().Update(ctx, &file); err != nil { + logger.Errorf("failed to update file: %s", err) + } + } + } + + if err := retry.OnError(retry.DefaultRetry, func(err error) bool { + return !apierrors.IsNotFound(err) + }, func() error { + if err := client.Get(ctx, router.Key(knowledged.GetNamespace(), knowledged.GetName()), knowledged); err != nil { + return err + } + + newStatus := knowledged.KnowledgeWorkspaceStatus() + newStatus.IngestionRunName = "" + return client.Status().Update(ctx, knowledged) + }); err != nil { + logger.Errorf("failed to update status: %s", err) + } +} + +func (a *Handler) GCFile(req router.Request, _ router.Response) error { + kFile := req.Object.(*v1.KnowledgeFile) + + if parent, err := knowledgeFileParent(req.Ctx, req.Client, kFile); apierrors.IsNotFound(err) || !parent.GetDeletionTimestamp().IsZero() { + return kclient.IgnoreNotFound(req.Delete(kFile)) + } else if err != nil { + return err + } + + return nil +} + +func (a *Handler) CleanupFile(req router.Request, _ router.Response) error { + kFile := req.Object.(*v1.KnowledgeFile) + + parent, err := knowledgeFileParent(req.Ctx, req.Client, kFile) + if apierrors.IsNotFound(err) { + // If the parent object is not found, then the workspaces will be deleted and nothing needs to happen here. + return nil + } + if err != nil { + return err + } + + status := parent.KnowledgeWorkspaceStatus() + + if err = a.workspaceClient.DeleteFile(req.Ctx, status.KnowledgeWorkspaceID, kFile.Spec.FileName); err != nil { + if errors.As(err, new(wclient.FileNotFoundError)) { + // It is important to return nil here and not move forward because when bulk deleting files from a remote provider + // (like OneDrive), the connector will remove the files from the local disk and the controller will remove the + // KnowledgeFile objects. We don't want to kick off (possibly) numerous ingestion runs. + return nil + } + return err + } + + files, err := a.workspaceClient.Ls(req.Ctx, status.KnowledgeWorkspaceID) + if err != nil { + return fmt.Errorf("failed to list files in workspace %s: %w", status.KnowledgeWorkspaceID, err) + } + + status.KnowledgeGeneration++ + status.HasKnowledge = len(files) > 0 + return req.Client.Status().Update(req.Ctx, parent) +} + +func knowledgeFileParent(ctx context.Context, client kclient.Client, kFile *v1.KnowledgeFile) (knowledge.Knowledgeable, error) { + switch { + case kFile.Spec.ThreadName != "": + var thread v1.Thread + return &thread, client.Get(ctx, router.Key(kFile.Namespace, kFile.Spec.ThreadName), &thread) + + case kFile.Spec.AgentName != "": + var agent v1.Agent + return &agent, client.Get(ctx, router.Key(kFile.Namespace, kFile.Spec.AgentName), &agent) + + case kFile.Spec.WorkflowName != "": + var workflow v1.Workflow + return &workflow, client.Get(ctx, router.Key(kFile.Namespace, kFile.Spec.WorkflowName), &workflow) + } + + return nil, fmt.Errorf("unable to find parent for knowledge file %s", kFile.Name) +} diff --git a/pkg/controller/handlers/runs/runs.go b/pkg/controller/handlers/runs/runs.go index 952460e6a..4f94f4dab 100644 --- a/pkg/controller/handlers/runs/runs.go +++ b/pkg/controller/handlers/runs/runs.go @@ -8,6 +8,7 @@ import ( v1 "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" ) var log = mvl.Package() @@ -22,12 +23,12 @@ func New(invoker *invoke.Invoker) *Handler { func (*Handler) DeleteRunState(req router.Request, resp router.Response) error { run := req.Object.(*v1.Run) - return req.Delete(&v1.RunState{ + return client.IgnoreNotFound(req.Delete(&v1.RunState{ ObjectMeta: metav1.ObjectMeta{ Name: run.Name, Namespace: run.Namespace, }, - }) + })) } func (*Handler) Cleanup(req router.Request, resp router.Response) error { diff --git a/pkg/controller/handlers/threads/threads.go b/pkg/controller/handlers/threads/threads.go index 078c54fed..fbc75dcaa 100644 --- a/pkg/controller/handlers/threads/threads.go +++ b/pkg/controller/handlers/threads/threads.go @@ -25,7 +25,17 @@ func New(workspace *wclient.Client, ingester *knowledge.Ingester, aihelper *aihe } } -func (t *ThreadHandler) Cleanup(req router.Request, resp router.Response) error { +func (t *ThreadHandler) MoveWorkspacesToStatus(req router.Request, _ router.Response) error { + thread := req.Object.(*v1.Thread) + if thread.Status.Workspace.WorkspaceID == "" || thread.Status.KnowledgeWorkspace.KnowledgeWorkspaceID == "" { + thread.Status.Workspace.WorkspaceID = thread.Spec.WorkspaceID + thread.Status.KnowledgeWorkspace.KnowledgeWorkspaceID = thread.Spec.KnowledgeWorkspaceID + } + + return nil +} + +func (t *ThreadHandler) Cleanup(req router.Request, _ router.Response) error { thread := req.Object.(*v1.Thread) if thread.Spec.AgentName != "" { @@ -83,7 +93,7 @@ func (t *ThreadHandler) HasKnowledge(handler router.Handler) router.Handler { }) } -func (t *ThreadHandler) Description(req router.Request, resp router.Response) error { +func (t *ThreadHandler) Description(req router.Request, _ router.Response) error { thread := req.Object.(*v1.Thread) if thread.Spec.Manifest.Description != "" || thread.Status.LastRunName == "" { diff --git a/pkg/controller/handlers/uploads/onedrive.go b/pkg/controller/handlers/uploads/onedrive.go index 46e766f72..c40f5aee0 100644 --- a/pkg/controller/handlers/uploads/onedrive.go +++ b/pkg/controller/handlers/uploads/onedrive.go @@ -1,19 +1,25 @@ package uploads import ( + "context" "encoding/json" + "errors" "fmt" "path/filepath" "github.com/acorn-io/baaah/pkg/router" + "github.com/acorn-io/baaah/pkg/uncached" "github.com/gptscript-ai/otto/pkg/invoke" + "github.com/gptscript-ai/otto/pkg/knowledge" v1 "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1" "github.com/gptscript-ai/otto/pkg/system" "github.com/gptscript-ai/otto/pkg/workspace" wclient "github.com/thedadams/workspace-provider/pkg/client" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -48,7 +54,6 @@ func (u *UploadHandler) CreateThread(req router.Request, _ router.Response) erro ObjectMeta: metav1.ObjectMeta{ GenerateName: system.ThreadPrefix, Namespace: req.Namespace, - Finalizers: []string{v1.ThreadFinalizer}, Labels: map[string]string{ v1.OneDriveLinksLabel: oneDriveLinks.Name, }, @@ -84,11 +89,23 @@ func (u *UploadHandler) RunUpload(req router.Request, _ router.Response) error { return err } - _, status, err := parentObjAndStatus(req, oneDriveLinks) + parentObj, status, err := parentObjAndStatus(req, oneDriveLinks) + if err != nil { + return err + } + + files, err := knowledgeFilesForParent(req, parentObj) if err != nil { return err } + output := map[string]any{ + "files": compileKnowledgeFilesForOneDriveConnector(files, oneDriveLinks.Name), + "folders": oneDriveLinks.Status.Folders, + "status": oneDriveLinks.Status.Status, + "error": oneDriveLinks.Status.Error, + } + writer, err := u.workspaceClient.WriteFile(req.Ctx, thread.Spec.WorkspaceID, ".metadata.json") if err != nil { return fmt.Errorf("failed to create metadata file: %w", err) @@ -99,6 +116,7 @@ func (u *UploadHandler) RunUpload(req router.Request, _ router.Response) error { "sharedLinks": oneDriveLinks.Spec.SharedLinks, "outputDir": filepath.Join(workspace.GetDir(status.KnowledgeWorkspaceID), oneDriveLinks.Name), }, + "output": output, }) if _, err = writer.Write(b); err != nil { @@ -147,12 +165,13 @@ func (u *UploadHandler) HandleUploadRun(req router.Request, _ router.Response) e return err } + // Read the output metadata from the connector tool. file, err := u.workspaceClient.OpenFile(req.Ctx, thread.Spec.WorkspaceID, ".metadata.json") if err != nil { return fmt.Errorf("failed to create metadata file: %w", err) } - var output map[string]v1.OnedriveLinksStatus + var output map[string]v1.OneDriveLinksConnectorStatus if err = json.NewDecoder(file).Decode(&output); err != nil { return fmt.Errorf("failed to decode metadata file: %w", err) } @@ -161,20 +180,16 @@ func (u *UploadHandler) HandleUploadRun(req router.Request, _ router.Response) e return fmt.Errorf("failed to close metadata file: %w", err) } - // Put the metadata file in the agent knowledge workspace - fileMetadata := make(map[string]any, len(output["output"].Files)) - outputDir := filepath.Join(workspace.GetDir(status.KnowledgeWorkspaceID), oneDriveLinks.Name) - for _, v := range output["output"].Files { - fileRelPath, err := filepath.Rel(outputDir, v.FilePath) - if err != nil { - fileRelPath = v.FilePath - } + fileMetadata, knowledgeFileNamesFromOutput, err := compileKnowledgeFilesFromOneDriveConnector(req.Ctx, req.Client, oneDriveLinks, output["output"].Files, status) + if err != nil { + return err + } - fileMetadata[fileRelPath] = map[string]any{ - "source": v.URL, - } + if err = deleteKnowledgeFilesNotIncluded(req.Ctx, req.Client, oneDriveLinks.Name, parentObj, knowledgeFileNamesFromOutput); err != nil { + return err } + // Put the metadata file in the agent knowledge workspace writer, err := u.workspaceClient.WriteFile(req.Ctx, status.KnowledgeWorkspaceID, filepath.Join(oneDriveLinks.Name, ".knowledge.json")) if err != nil { return fmt.Errorf("failed to create metadata file: %w", err) @@ -188,16 +203,13 @@ func (u *UploadHandler) HandleUploadRun(req router.Request, _ router.Response) e // Reset the agent knowledge generation so that ingestion kicks off again. status.KnowledgeGeneration++ - status.HasKnowledge = true if err = req.Client.Status().Update(req.Ctx, parentObj); err != nil { return fmt.Errorf("failed to update agent observed knowledge generation: %w", err) } - outputStatus := output["output"] - oneDriveLinks.Status.Error = outputStatus.Error - oneDriveLinks.Status.Status = outputStatus.Status - oneDriveLinks.Status.Files = outputStatus.Files - oneDriveLinks.Status.Folders = outputStatus.Folders + oneDriveLinks.Status.Error = output["output"].Error + oneDriveLinks.Status.Status = output["output"].Status + oneDriveLinks.Status.Folders = output["output"].Folders // Reset thread name and observed generation so future ingests will create a new thread oneDriveLinks.Status.ThreadName = "" @@ -278,7 +290,7 @@ func (u *UploadHandler) GC(req router.Request, _ router.Response) error { return nil } -func parentObjAndStatus(req router.Request, onedriveLinks *v1.OneDriveLinks) (client.Object, *v1.KnowledgeWorkspaceStatus, error) { +func parentObjAndStatus(req router.Request, onedriveLinks *v1.OneDriveLinks) (knowledge.Knowledgeable, *v1.KnowledgeWorkspaceStatus, error) { if onedriveLinks.Spec.AgentName != "" { var agent v1.Agent if err := req.Get(&agent, onedriveLinks.Namespace, onedriveLinks.Spec.AgentName); err != nil { @@ -297,3 +309,121 @@ func parentObjAndStatus(req router.Request, onedriveLinks *v1.OneDriveLinks) (cl return nil, nil, fmt.Errorf("no parent object found for onedrive link %q", onedriveLinks.Name) } + +func knowledgeFilesForParent(req router.Request, parentObj knowledge.Knowledgeable) (v1.KnowledgeFileList, error) { + var files v1.KnowledgeFileList + return files, req.List(&files, &client.ListOptions{ + FieldSelector: fieldSelectorForFiles(parentObj), + Namespace: parentObj.GetNamespace(), + }) +} + +func compileKnowledgeFilesFromOneDriveConnector(ctx context.Context, c client.Client, oneDriveLinks *v1.OneDriveLinks, files map[string]v1.FileDetails, status *v1.KnowledgeWorkspaceStatus) (map[string]any, map[string]struct{}, error) { + var ( + errs []error + // fileMetadata is the metadata for the knowledge tool, translated from the connector output. + fileMetadata = make(map[string]any, len(files)) + outputDir = workspace.GetDir(status.KnowledgeWorkspaceID) + knowledgeFileNamesFromOutput = make(map[string]struct{}, len(files)) + ) + for id, v := range files { + fileRelPath, err := filepath.Rel(outputDir, v.FilePath) + if err != nil { + fileRelPath = v.FilePath + } + + fileMetadata[fileRelPath] = map[string]any{ + "source": v.URL, + } + + name := v1.ObjectNameFromAbsolutePath(v.FilePath) + knowledgeFileNamesFromOutput[name] = struct{}{} + newKnowledgeFile := &v1.KnowledgeFile{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: oneDriveLinks.Namespace, + }, + Spec: v1.KnowledgeFileSpec{ + AgentName: oneDriveLinks.Spec.AgentName, + WorkflowName: oneDriveLinks.Spec.WorkflowName, + FileName: fileRelPath, + UploadName: oneDriveLinks.Name, + }, + } + if err := c.Create(ctx, newKnowledgeFile); err == nil || apierrors.IsAlreadyExists(err) { + // If the file was created or already existed, ensure it has the latest details from the metadata. + if err := retry.OnError(retry.DefaultRetry, func(err error) bool { + return !apierrors.IsNotFound(err) + }, func() error { + if err := c.Get(ctx, router.Key(newKnowledgeFile.Namespace, newKnowledgeFile.Name), uncached.Get(newKnowledgeFile)); err != nil { + return err + } + + newKnowledgeFile.Status.FileDetails = v + newKnowledgeFile.Status.UploadID = id + return c.Status().Update(ctx, newKnowledgeFile) + }); err != nil { + errs = append(errs, fmt.Errorf("failed to update knowledge file %q status: %w", newKnowledgeFile.Name, err)) + } + } else if err != nil { + errs = append(errs, err) + } + + status.HasKnowledge = true + } + + if len(errs) > 0 { + return nil, nil, fmt.Errorf("failed to create knowledge files: %w", errors.Join(errs...)) + } + + return fileMetadata, knowledgeFileNamesFromOutput, nil +} + +func deleteKnowledgeFilesNotIncluded(ctx context.Context, c client.Client, uploadName string, parentObj knowledge.Knowledgeable, filenames map[string]struct{}) error { + var knowledgeFiles v1.KnowledgeFileList + if err := c.List(ctx, uncached.List(&knowledgeFiles), &client.ListOptions{ + Namespace: parentObj.GetNamespace(), + FieldSelector: fieldSelectorForFiles(parentObj), + }); err != nil { + return fmt.Errorf("failed to list knowledge files: %w", err) + } + + var errs []error + for _, knowledgeFile := range knowledgeFiles.Items { + if _, exists := filenames[knowledgeFile.Name]; !exists && knowledgeFile.Spec.UploadName == uploadName { + if err := c.Delete(ctx, &knowledgeFile); err != nil { + errs = append(errs, fmt.Errorf("failed to delete knowledge file %q: %w", knowledgeFile.Name, err)) + } + } + } + + return errors.Join(errs...) +} + +func fieldSelectorForFiles(parentObj knowledge.Knowledgeable) fields.Selector { + fieldSelector := make(map[string]string) + if threadName := parentObj.ThreadName(); threadName != "" { + fieldSelector["spec.threadName"] = threadName + } + + if agentName := parentObj.AgentName(); agentName != "" && len(fieldSelector) == 0 { + fieldSelector["spec.agentName"] = agentName + } + + if workflowName := parentObj.WorkflowName(); workflowName != "" && len(fieldSelector) == 0 { + fieldSelector["spec.workflowName"] = workflowName + } + + return fields.SelectorFromSet(fieldSelector) +} + +func compileKnowledgeFilesForOneDriveConnector(files v1.KnowledgeFileList, uploadName string) map[string]v1.FileDetails { + knowledgeFileStatuses := make(map[string]v1.FileDetails, len(files.Items)) + for _, file := range files.Items { + if file.Spec.UploadName == uploadName { + knowledgeFileStatuses[file.Status.UploadID] = file.Status.FileDetails + } + } + + return knowledgeFileStatuses +} diff --git a/pkg/controller/handlers/workspace/workspace.go b/pkg/controller/handlers/workspace/workspace.go index 79f3f954f..1f78e6f38 100644 --- a/pkg/controller/handlers/workspace/workspace.go +++ b/pkg/controller/handlers/workspace/workspace.go @@ -21,12 +21,12 @@ func New(wc *wclient.Client, wp string) *Handler { type workspaceable interface { kclient.Object - GetWorkspaceStatus() *v1.WorkspaceStatus + WorkspaceStatus() *v1.WorkspaceStatus } -func (a *Handler) CreateWorkspace(req router.Request, resp router.Response) error { +func (a *Handler) CreateWorkspace(req router.Request, _ router.Response) error { workspaced := req.Object.(workspaceable) - status := workspaced.GetWorkspaceStatus() + status := workspaced.WorkspaceStatus() if status.WorkspaceID != "" { return nil } @@ -46,15 +46,14 @@ func (a *Handler) CreateWorkspace(req router.Request, resp router.Response) erro return nil } -func (a *Handler) RemoveWorkspace(req router.Request, resp router.Response) error { +func (a *Handler) RemoveWorkspace(req router.Request, _ router.Response) error { workspaced := req.Object.(workspaceable) - status := workspaced.GetWorkspaceStatus() + status := workspaced.WorkspaceStatus() if status.WorkspaceID != "" { if err := a.workspaceClient.Rm(req.Ctx, status.WorkspaceID); err != nil { return err } } - status.WorkspaceID = "" return nil } diff --git a/pkg/controller/routes.go b/pkg/controller/routes.go index 7ff2a9877..a4cf00c7e 100644 --- a/pkg/controller/routes.go +++ b/pkg/controller/routes.go @@ -36,15 +36,16 @@ func routes(router *router.Router, svcs *services.Services) error { root.Type(&v1.Run{}).HandlerFunc(runs.Resume) // Threads - root.Type(&v1.Thread{}).FinalizeFunc(v1.ThreadFinalizer, knowledge.RemoveWorkspace) - root.Type(&v1.Thread{}).FinalizeFunc(v1.ThreadFinalizer, workspace.RemoveWorkspace) + root.Type(&v1.Thread{}).FinalizeFunc(v1.ThreadWorkspaceFinalizer, knowledge.RemoveWorkspace) + root.Type(&v1.Thread{}).FinalizeFunc(v1.ThreadKnowledgeFinalizer, workspace.RemoveWorkspace) + root.Type(&v1.Thread{}).HandlerFunc(threads.MoveWorkspacesToStatus) root.Type(&v1.Thread{}).HandlerFunc(threads.Cleanup) root.Type(&v1.Thread{}).HandlerFunc(threads.Description) root.Type(&v1.Thread{}).Middleware(threads.HasKnowledge).HandlerFunc(knowledge.IngestKnowledge) // Workflows - root.Type(&v1.Workflow{}).FinalizeFunc(v1.WorkflowFinalizer, workspace.RemoveWorkspace) - root.Type(&v1.Workflow{}).FinalizeFunc(v1.WorkflowFinalizer, knowledge.RemoveWorkspace) + root.Type(&v1.Workflow{}).FinalizeFunc(v1.WorkflowWorkspaceFinalizer, workspace.RemoveWorkspace) + root.Type(&v1.Workflow{}).FinalizeFunc(v1.WorkflowKnowledgeFinalizer, knowledge.RemoveWorkspace) root.Type(&v1.Workflow{}).HandlerFunc(workspace.CreateWorkspace) root.Type(&v1.Workflow{}).HandlerFunc(knowledge.CreateWorkspace) @@ -64,8 +65,8 @@ func routes(router *router.Router, svcs *services.Services) error { running.HandlerFunc(workflowStep.RunSubflow) // Agents - root.Type(&v1.Agent{}).FinalizeFunc(v1.AgentFinalizer, workspace.RemoveWorkspace) - root.Type(&v1.Agent{}).FinalizeFunc(v1.AgentFinalizer, knowledge.RemoveWorkspace) + root.Type(&v1.Agent{}).FinalizeFunc(v1.AgentWorkspaceFinalizer, workspace.RemoveWorkspace) + root.Type(&v1.Agent{}).FinalizeFunc(v1.AgentKnowledgeFinalizer, knowledge.RemoveWorkspace) root.Type(&v1.Agent{}).HandlerFunc(agents.Suggestion) root.Type(&v1.Agent{}).HandlerFunc(workspace.CreateWorkspace) root.Type(&v1.Agent{}).HandlerFunc(knowledge.CreateWorkspace) @@ -86,5 +87,9 @@ func routes(router *router.Router, svcs *services.Services) error { // Workflows root.Type(&v1.Workflow{}).HandlerFunc(workflow.EnsureIDs) + // Knowledge files + root.Type(&v1.KnowledgeFile{}).HandlerFunc(knowledge.GCFile) + root.Type(&v1.KnowledgeFile{}).FinalizeFunc(v1.KnowledgeFileFinalizer, knowledge.CleanupFile) + return nil } diff --git a/pkg/invoke/invoker.go b/pkg/invoke/invoker.go index f6af80d2f..62fd9b2be 100644 --- a/pkg/invoke/invoker.go +++ b/pkg/invoke/invoker.go @@ -134,7 +134,6 @@ func (i *Invoker) NewThread(ctx context.Context, c kclient.Client, namespace str Name: createName, GenerateName: system.ThreadPrefix, Namespace: namespace, - Finalizers: []string{v1.ThreadFinalizer}, }, Spec: v1.ThreadSpec{ AgentName: opt.AgentName, @@ -426,8 +425,9 @@ func (i *Invoker) Resume(ctx context.Context, c kclient.Client, thread *v1.Threa } func (i *Invoker) saveState(ctx context.Context, c kclient.Client, thread *v1.Thread, run *v1.Run, runResp *gptscript.Run, retErr error) error { + var err error for j := 0; j < 3; j++ { - err := i.doSaveState(ctx, c, thread, run, runResp, retErr) + err = i.doSaveState(ctx, c, thread, run, runResp, retErr) if err == nil { return retErr } @@ -435,15 +435,15 @@ func (i *Invoker) saveState(ctx context.Context, c kclient.Client, thread *v1.Th return errors.Join(err, retErr) } // reload - if err := c.Get(ctx, router.Key(run.Namespace, run.Name), run); err != nil { + if err = c.Get(ctx, router.Key(run.Namespace, run.Name), run); err != nil { return errors.Join(err, retErr) } - if err := c.Get(ctx, router.Key(thread.Namespace, thread.Name), thread); err != nil { + if err = c.Get(ctx, router.Key(thread.Namespace, thread.Name), thread); err != nil { return errors.Join(err, retErr) } time.Sleep(500 * time.Millisecond) } - return fmt.Errorf("failed to save state after 3 retries: %w", retErr) + return fmt.Errorf("failed to save state after 3 retries: %w", errors.Join(err, retErr)) } func (i *Invoker) doSaveState(ctx context.Context, c kclient.Client, thread *v1.Thread, run *v1.Run, runResp *gptscript.Run, retErr error) error { diff --git a/pkg/knowledge/knowledge.go b/pkg/knowledge/knowledge.go index d3b8b6d54..c46b804d7 100644 --- a/pkg/knowledge/knowledge.go +++ b/pkg/knowledge/knowledge.go @@ -13,7 +13,10 @@ import ( type Knowledgeable interface { client.Object - GetKnowledgeWorkspaceStatus() *v1.KnowledgeWorkspaceStatus + KnowledgeWorkspaceStatus() *v1.KnowledgeWorkspaceStatus + AgentName() string + WorkflowName() string + ThreadName() string } type Ingester struct { @@ -28,54 +31,36 @@ func NewIngester(invoker *invoke.Invoker, knowledgeTool string) *Ingester { } } -func (i *Ingester) IngestKnowledge(ctx context.Context, agentName, namespace, knowledgeWorkspaceID string) error { - knowledgeTool, tag, ok := strings.Cut(i.knowledgeTool, "@") - if ok { - tag = "@" + tag - } - - run, err := i.invoker.SystemAction( +func (i *Ingester) IngestKnowledge(ctx context.Context, agentName, namespace, knowledgeWorkspaceID string) (*invoke.Response, error) { + return i.invoker.SystemAction( ctx, "ingest-", agentName, namespace, - knowledgeTool+"/ingest.gpt"+tag, + fullKnowledgeTool(i.knowledgeTool, "ingest.gpt"), workspace.GetDir(knowledgeWorkspaceID), - // These are environment variables passed to the script + // Below are environment variables used by the ingest tool "GPTSCRIPT_DATASET="+workspace.KnowledgeIDFromWorkspaceID(knowledgeWorkspaceID), + "KNOW_JSON=true", ) - if err != nil { - return err - } - - run.Wait() - if run.Run.Status.Error != "" { - return fmt.Errorf("failed to ingest knowledge: %s", run.Run.Status.Error) - } - return nil } -func (i *Ingester) DeleteKnowledge(ctx context.Context, agentName, namespace, knowledgeWorkspaceID string) error { - knowledgeTool, tag, ok := strings.Cut(i.knowledgeTool, "@") - if ok { - tag = "@" + tag - } - - run, err := i.invoker.SystemAction( +func (i *Ingester) DeleteKnowledge(ctx context.Context, agentName, namespace, knowledgeWorkspaceID string) (*invoke.Response, error) { + return i.invoker.SystemAction( ctx, "ingest-delete-", agentName, namespace, - knowledgeTool+"/delete.gpt"+tag, + fullKnowledgeTool(i.knowledgeTool, "delete.gpt"), workspace.KnowledgeIDFromWorkspaceID(knowledgeWorkspaceID), ) - if err != nil { - return err - } +} - run.Wait() - if run.Run.Status.Error != "" { - return fmt.Errorf("failed to delete knowledge: %s", run.Run.Status.Error) +func fullKnowledgeTool(knowledgeTool, subTool string) string { + knowledgeTool, tag, ok := strings.Cut(knowledgeTool, "@") + if ok { + tag = "@" + tag } - return nil + + return fmt.Sprintf("%s/%s%s", knowledgeTool, subTool, tag) } diff --git a/pkg/render/render.go b/pkg/render/render.go index 369dc7438..7a3624b94 100644 --- a/pkg/render/render.go +++ b/pkg/render/render.go @@ -21,13 +21,13 @@ type AgentOptions struct { } func agentKnowledgeEnv(agent *v1.Agent, thread *v1.Thread) (envs []string) { - if agent.GetKnowledgeWorkspaceStatus().HasKnowledge { + if agent.KnowledgeWorkspaceStatus().HasKnowledge { envs = append(envs, - fmt.Sprintf("GPTSCRIPT_SCRIPT_ID=%s", workspace.KnowledgeIDFromWorkspaceID(agent.GetKnowledgeWorkspaceStatus().KnowledgeWorkspaceID)), + fmt.Sprintf("GPTSCRIPT_SCRIPT_ID=%s", workspace.KnowledgeIDFromWorkspaceID(agent.KnowledgeWorkspaceStatus().KnowledgeWorkspaceID)), ) - if thread != nil && thread.GetKnowledgeWorkspaceStatus().HasKnowledge { + if thread != nil && thread.KnowledgeWorkspaceStatus().HasKnowledge { envs = append(envs, - fmt.Sprintf("GPTSCRIPT_THREAD_ID=%s", workspace.KnowledgeIDFromWorkspaceID(thread.GetKnowledgeWorkspaceStatus().KnowledgeWorkspaceID)), + fmt.Sprintf("GPTSCRIPT_THREAD_ID=%s", workspace.KnowledgeIDFromWorkspaceID(thread.KnowledgeWorkspaceStatus().KnowledgeWorkspaceID)), ) } } diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/agent.go b/pkg/storage/apis/otto.gptscript.ai/v1/agent.go index 8fbfd6104..f665d6256 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/agent.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/agent.go @@ -19,11 +19,23 @@ type Agent struct { Status AgentStatus `json:"status,omitempty"` } -func (in *Agent) GetKnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { +func (in *Agent) AgentName() string { + return in.Name +} + +func (in *Agent) WorkflowName() string { + return "" +} + +func (in *Agent) ThreadName() string { + return "" +} + +func (in *Agent) KnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { return &in.Status.KnowledgeWorkspace } -func (in *Agent) GetWorkspaceStatus() *WorkspaceStatus { +func (in *Agent) WorkspaceStatus() *WorkspaceStatus { return &in.Status.Workspace } @@ -58,6 +70,7 @@ type KnowledgeWorkspaceStatus struct { KnowledgeGeneration int64 `json:"knowledgeGeneration,omitempty"` ObservedKnowledgeGeneration int64 `json:"observedKnowledgeGeneration,omitempty"` KnowledgeWorkspaceID string `json:"knowledgeWorkspaceID,omitempty"` + IngestionRunName string `json:"ingestionRunName,omitempty"` } type AgentExternalStatus struct { diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/file.go b/pkg/storage/apis/otto.gptscript.ai/v1/file.go new file mode 100644 index 000000000..fad4ec508 --- /dev/null +++ b/pkg/storage/apis/otto.gptscript.ai/v1/file.go @@ -0,0 +1,94 @@ +package v1 + +import ( + "crypto/sha256" + "fmt" + + "github.com/acorn-io/baaah/pkg/fields" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type KnowledgeFile struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec KnowledgeFileSpec `json:"spec,omitempty"` + Status KnowledgeFileStatus `json:"status,omitempty"` +} + +func (k *KnowledgeFile) Has(field string) bool { + return k.Get(field) != "" +} + +func (k *KnowledgeFile) Get(field string) string { + if k == nil { + return "" + } + + switch field { + case "spec.agentName": + return k.Spec.AgentName + case "spec.workflowName": + return k.Spec.WorkflowName + case "spec.threadName": + return k.Spec.ThreadName + case "spec.uploadName": + return k.Spec.UploadName + } + + return "" +} + +func (*KnowledgeFile) FieldNames() []string { + return []string{"spec.agentName", "spec.workflowName", "spec.threadName", "spec.uploadName"} +} + +var _ fields.Fields = (*KnowledgeFile)(nil) + +type KnowledgeFileSpec struct { + FileName string `json:"fileName"` + AgentName string `json:"agentName,omitempty"` + WorkflowName string `json:"workflowName,omitempty"` + ThreadName string `json:"threadName,omitempty"` + UploadName string `json:"uploadName,omitempty"` +} + +type KnowledgeFileStatus struct { + IngestionStatus IngestionStatus `json:"ingestionStatus,omitempty"` + FileDetails FileDetails `json:"fileDetails,omitempty"` + UploadID string `json:"uploadID,omitempty"` +} + +type IngestionStatus struct { + Count int `json:"count,omitempty"` + Reason string `json:"reason,omitempty"` + AbsolutePath string `json:"absolute_path,omitempty"` + BasePath string `json:"basePath,omitempty"` + Filename string `json:"filename,omitempty"` + VectorStore string `json:"vectorstore,omitempty"` + Message string `json:"msg,omitempty"` + Flow string `json:"flow,omitempty"` + RootPath string `json:"rootPath,omitempty"` + Filepath string `json:"filepath,omitempty"` + Phase string `json:"phase,omitempty"` + NumDocuments int `json:"num_documents,omitempty"` + Stage string `json:"stage,omitempty"` + Status string `json:"status,omitempty"` + Component string `json:"component,omitempty"` + FileType string `json:"filetype,omitempty"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type KnowledgeFileList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + + Items []KnowledgeFile `json:"items"` +} + +func ObjectNameFromAbsolutePath(absolutePath string) string { + return fmt.Sprintf("%x", sha256.Sum256([]byte(absolutePath))) +} diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/run.go b/pkg/storage/apis/otto.gptscript.ai/v1/run.go index a793cc20f..6864673fe 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/run.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/run.go @@ -11,11 +11,15 @@ var ( ) const ( - AgentFinalizer = "otto.gptscript.ai/agent" + AgentWorkspaceFinalizer = "otto.gptscript.ai/agent-workspace" + AgentKnowledgeFinalizer = "otto.gptscript.ai/agent-knowledge" RunFinalizer = "otto.gptscript.ai/run" - ThreadFinalizer = "otto.gptscript.ai/thread" + ThreadWorkspaceFinalizer = "otto.gptscript.ai/thread-workspace" + ThreadKnowledgeFinalizer = "otto.gptscript.ai/thread-knowledge" WorkflowExecutionFinalizer = "otto.gptscript.ai/workflow-execution" - WorkflowFinalizer = "otto.gptscript.ai/workflow" + WorkflowWorkspaceFinalizer = "otto.gptscript.ai/workflow-workspace" + WorkflowKnowledgeFinalizer = "otto.gptscript.ai/workflow-knowledge" + KnowledgeFileFinalizer = "otto.gptscript.ai/knowledge-file" ) const ( diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/scheme.go b/pkg/storage/apis/otto.gptscript.ai/v1/scheme.go index 42eebe0c3..e5bc9e266 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/scheme.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/scheme.go @@ -1,6 +1,7 @@ package v1 import ( + "github.com/acorn-io/baaah/pkg/fields" otto_gptscript_ai "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -19,7 +20,7 @@ func AddToScheme(scheme *runtime.Scheme) error { } func AddToSchemeWithGV(scheme *runtime.Scheme, schemeGroupVersion schema.GroupVersion) error { - scheme.AddKnownTypes(schemeGroupVersion, + if err := fields.AddKnownTypesWithFieldConversion(scheme, schemeGroupVersion, &Agent{}, &AgentList{}, &Run{}, @@ -38,7 +39,11 @@ func AddToSchemeWithGV(scheme *runtime.Scheme, schemeGroupVersion schema.GroupVe &WorkflowStepList{}, &OneDriveLinks{}, &OneDriveLinksList{}, - ) + &KnowledgeFile{}, + &KnowledgeFileList{}, + ); err != nil { + return err + } // Add common types scheme.AddKnownTypes(schemeGroupVersion, &metav1.Status{}) diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/thread.go b/pkg/storage/apis/otto.gptscript.ai/v1/thread.go index c6fc08ca9..bd689afc6 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/thread.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/thread.go @@ -20,19 +20,23 @@ type Thread struct { Status ThreadStatus `json:"status,omitempty"` } -func (in *Thread) GetKnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { - // This is crazy hack and may cause issues in the future. So if it does, find a better way. That's your problem. - if in.Spec.KnowledgeWorkspaceID != "" && in.Status.KnowledgeWorkspace.KnowledgeWorkspaceID == "" { - in.Status.KnowledgeWorkspace.KnowledgeWorkspaceID = in.Spec.KnowledgeWorkspaceID - } +func (in *Thread) AgentName() string { + return in.Spec.AgentName +} + +func (in *Thread) WorkflowName() string { + return in.Spec.WorkflowName +} + +func (in *Thread) ThreadName() string { + return in.Name +} + +func (in *Thread) KnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { return &in.Status.KnowledgeWorkspace } -func (in *Thread) GetWorkspaceStatus() *WorkspaceStatus { - if in.Spec.WorkspaceID != "" && in.Status.Workspace.WorkspaceID == "" { - in.Status.Workspace.WorkspaceID = in.Spec.WorkspaceID - } - // This is crazy hack and may cause issues in the future. So if it does, find a better way. That's your problem. +func (in *Thread) WorkspaceStatus() *WorkspaceStatus { return &in.Status.Workspace } diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/upload.go b/pkg/storage/apis/otto.gptscript.ai/v1/upload.go index fce6b1442..ddb69f064 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/upload.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/upload.go @@ -35,14 +35,20 @@ type OnedriveLinksSpec struct { } type OnedriveLinksStatus struct { - Conditions []metav1.Condition `json:"conditions,omitempty"` - ThreadName string `json:"threadName,omitempty"` - RunName string `json:"runName,omitempty"` - ObservedGeneration int64 `json:"observedGeneration,omitempty"` - Status string `json:"output,omitempty"` - Error string `json:"error,omitempty"` - Files map[string]FileDetails `json:"files,omitempty"` - Folders FolderSet `json:"folders,omitempty"` + Conditions []metav1.Condition `json:"conditions,omitempty"` + ThreadName string `json:"threadName,omitempty"` + RunName string `json:"runName,omitempty"` + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + Status string `json:"output,omitempty"` + Error string `json:"error,omitempty"` + Folders FolderSet `json:"folders,omitempty"` +} + +type OneDriveLinksConnectorStatus struct { + Status string `json:"output,omitempty"` + Error string `json:"error,omitempty"` + Files map[string]FileDetails `json:"files,omitempty"` + Folders FolderSet `json:"folders,omitempty"` } type FileDetails struct { diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/workflow.go b/pkg/storage/apis/otto.gptscript.ai/v1/workflow.go index 3963db416..df6af912f 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/workflow.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/workflow.go @@ -19,11 +19,23 @@ type Workflow struct { Status WorkflowStatus `json:"status,omitempty"` } -func (in *Workflow) GetKnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { +func (in *Workflow) AgentName() string { + return "" +} + +func (in *Workflow) WorkflowName() string { + return in.Name +} + +func (in *Workflow) ThreadName() string { + return "" +} + +func (in *Workflow) KnowledgeWorkspaceStatus() *KnowledgeWorkspaceStatus { return &in.Status.KnowledgeWorkspace } -func (in *Workflow) GetWorkspaceStatus() *WorkspaceStatus { +func (in *Workflow) WorkspaceStatus() *WorkspaceStatus { return &in.Status.Workspace } diff --git a/pkg/storage/apis/otto.gptscript.ai/v1/zz_generated.deepcopy.go b/pkg/storage/apis/otto.gptscript.ai/v1/zz_generated.deepcopy.go index bba0d4a2f..6735aed8e 100644 --- a/pkg/storage/apis/otto.gptscript.ai/v1/zz_generated.deepcopy.go +++ b/pkg/storage/apis/otto.gptscript.ai/v1/zz_generated.deepcopy.go @@ -237,6 +237,21 @@ func (in *If) DeepCopy() *If { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *IngestionStatus) DeepCopyInto(out *IngestionStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IngestionStatus. +func (in *IngestionStatus) DeepCopy() *IngestionStatus { + if in == nil { + return nil + } + out := new(IngestionStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Item) DeepCopyInto(out *Item) { *out = *in @@ -252,6 +267,97 @@ func (in *Item) DeepCopy() *Item { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeFile) DeepCopyInto(out *KnowledgeFile) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeFile. +func (in *KnowledgeFile) DeepCopy() *KnowledgeFile { + if in == nil { + return nil + } + out := new(KnowledgeFile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KnowledgeFile) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeFileList) DeepCopyInto(out *KnowledgeFileList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]KnowledgeFile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeFileList. +func (in *KnowledgeFileList) DeepCopy() *KnowledgeFileList { + if in == nil { + return nil + } + out := new(KnowledgeFileList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KnowledgeFileList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeFileSpec) DeepCopyInto(out *KnowledgeFileSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeFileSpec. +func (in *KnowledgeFileSpec) DeepCopy() *KnowledgeFileSpec { + if in == nil { + return nil + } + out := new(KnowledgeFileSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KnowledgeFileStatus) DeepCopyInto(out *KnowledgeFileStatus) { + *out = *in + out.IngestionStatus = in.IngestionStatus + out.FileDetails = in.FileDetails +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KnowledgeFileStatus. +func (in *KnowledgeFileStatus) DeepCopy() *KnowledgeFileStatus { + if in == nil { + return nil + } + out := new(KnowledgeFileStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KnowledgeWorkspaceStatus) DeepCopyInto(out *KnowledgeWorkspaceStatus) { *out = *in @@ -294,6 +400,35 @@ func (in *OneDriveLinks) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OneDriveLinksConnectorStatus) DeepCopyInto(out *OneDriveLinksConnectorStatus) { + *out = *in + if in.Files != nil { + in, out := &in.Files, &out.Files + *out = make(map[string]FileDetails, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Folders != nil { + in, out := &in.Folders, &out.Folders + *out = make(FolderSet, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OneDriveLinksConnectorStatus. +func (in *OneDriveLinksConnectorStatus) DeepCopy() *OneDriveLinksConnectorStatus { + if in == nil { + return nil + } + out := new(OneDriveLinksConnectorStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OneDriveLinksList) DeepCopyInto(out *OneDriveLinksList) { *out = *in @@ -356,13 +491,6 @@ func (in *OnedriveLinksStatus) DeepCopyInto(out *OnedriveLinksStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.Files != nil { - in, out := &in.Files, &out.Files - *out = make(map[string]FileDetails, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } if in.Folders != nil { in, out := &in.Folders, &out.Folders *out = make(FolderSet, len(*in)) diff --git a/pkg/storage/openapi/generated/openapi_generated.go b/pkg/storage/openapi/generated/openapi_generated.go index 609de2a18..7320f3fab 100644 --- a/pkg/storage/openapi/generated/openapi_generated.go +++ b/pkg/storage/openapi/generated/openapi_generated.go @@ -25,9 +25,15 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.AgentStatus": schema_storage_apis_ottogptscriptai_v1_AgentStatus(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails": schema_storage_apis_ottogptscriptai_v1_FileDetails(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.If": schema_storage_apis_ottogptscriptai_v1_If(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.IngestionStatus": schema_storage_apis_ottogptscriptai_v1_IngestionStatus(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.Item": schema_storage_apis_ottogptscriptai_v1_Item(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFile": schema_storage_apis_ottogptscriptai_v1_KnowledgeFile(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileList": schema_storage_apis_ottogptscriptai_v1_KnowledgeFileList(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileSpec": schema_storage_apis_ottogptscriptai_v1_KnowledgeFileSpec(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileStatus": schema_storage_apis_ottogptscriptai_v1_KnowledgeFileStatus(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeWorkspaceStatus": schema_storage_apis_ottogptscriptai_v1_KnowledgeWorkspaceStatus(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.OneDriveLinks": schema_storage_apis_ottogptscriptai_v1_OneDriveLinks(ref), + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.OneDriveLinksConnectorStatus": schema_storage_apis_ottogptscriptai_v1_OneDriveLinksConnectorStatus(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.OneDriveLinksList": schema_storage_apis_ottogptscriptai_v1_OneDriveLinksList(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.OnedriveLinksSpec": schema_storage_apis_ottogptscriptai_v1_OnedriveLinksSpec(ref), "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.OnedriveLinksStatus": schema_storage_apis_ottogptscriptai_v1_OnedriveLinksStatus(ref), @@ -512,6 +518,114 @@ func schema_storage_apis_ottogptscriptai_v1_If(ref common.ReferenceCallback) com } } +func schema_storage_apis_ottogptscriptai_v1_IngestionStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "count": { + SchemaProps: spec.SchemaProps{ + Type: []string{"integer"}, + Format: "int32", + }, + }, + "reason": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "absolute_path": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "basePath": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "filename": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "vectorstore": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "msg": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "flow": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "rootPath": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "filepath": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "phase": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "num_documents": { + SchemaProps: spec.SchemaProps{ + Type: []string{"integer"}, + Format: "int32", + }, + }, + "stage": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "status": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "component": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "filetype": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + } +} + func schema_storage_apis_ottogptscriptai_v1_Item(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -522,6 +636,176 @@ func schema_storage_apis_ottogptscriptai_v1_Item(ref common.ReferenceCallback) c } } +func schema_storage_apis_ottogptscriptai_v1_KnowledgeFile(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "kind": { + SchemaProps: spec.SchemaProps{ + Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", + Type: []string{"string"}, + Format: "", + }, + }, + "apiVersion": { + SchemaProps: spec.SchemaProps{ + Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", + Type: []string{"string"}, + Format: "", + }, + }, + "metadata": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"), + }, + }, + "spec": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileSpec"), + }, + }, + "status": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileStatus"), + }, + }, + }, + }, + }, + Dependencies: []string{ + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileSpec", "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFileStatus", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"}, + } +} + +func schema_storage_apis_ottogptscriptai_v1_KnowledgeFileList(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "kind": { + SchemaProps: spec.SchemaProps{ + Description: "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds", + Type: []string{"string"}, + Format: "", + }, + }, + "apiVersion": { + SchemaProps: spec.SchemaProps{ + Description: "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources", + Type: []string{"string"}, + Format: "", + }, + }, + "metadata": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"), + }, + }, + "items": { + SchemaProps: spec.SchemaProps{ + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFile"), + }, + }, + }, + }, + }, + }, + Required: []string{"items"}, + }, + }, + Dependencies: []string{ + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.KnowledgeFile", "k8s.io/apimachinery/pkg/apis/meta/v1.ListMeta"}, + } +} + +func schema_storage_apis_ottogptscriptai_v1_KnowledgeFileSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "fileName": { + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "agentName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "workflowName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "threadName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "uploadName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + }, + Required: []string{"fileName"}, + }, + }, + } +} + +func schema_storage_apis_ottogptscriptai_v1_KnowledgeFileStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "ingestionStatus": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.IngestionStatus"), + }, + }, + "fileDetails": { + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails"), + }, + }, + "uploadID": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, + Dependencies: []string{ + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails", "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.IngestionStatus"}, + } +} + func schema_storage_apis_ottogptscriptai_v1_KnowledgeWorkspaceStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -552,6 +836,12 @@ func schema_storage_apis_ottogptscriptai_v1_KnowledgeWorkspaceStatus(ref common. Format: "", }, }, + "ingestionRunName": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, }, }, @@ -604,6 +894,60 @@ func schema_storage_apis_ottogptscriptai_v1_OneDriveLinks(ref common.ReferenceCa } } +func schema_storage_apis_ottogptscriptai_v1_OneDriveLinksConnectorStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "output": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "error": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, + "files": { + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + AdditionalProperties: &spec.SchemaOrBool{ + Allows: true, + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails"), + }, + }, + }, + }, + }, + "folders": { + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + AdditionalProperties: &spec.SchemaOrBool{ + Allows: true, + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.Item"), + }, + }, + }, + }, + }, + }, + }, + }, + Dependencies: []string{ + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails", "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.Item"}, + } +} + func schema_storage_apis_ottogptscriptai_v1_OneDriveLinksList(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -739,20 +1083,6 @@ func schema_storage_apis_ottogptscriptai_v1_OnedriveLinksStatus(ref common.Refer Format: "", }, }, - "files": { - SchemaProps: spec.SchemaProps{ - Type: []string{"object"}, - AdditionalProperties: &spec.SchemaOrBool{ - Allows: true, - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails"), - }, - }, - }, - }, - }, "folders": { SchemaProps: spec.SchemaProps{ Type: []string{"object"}, @@ -771,7 +1101,7 @@ func schema_storage_apis_ottogptscriptai_v1_OnedriveLinksStatus(ref common.Refer }, }, Dependencies: []string{ - "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.FileDetails", "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.Item", "k8s.io/apimachinery/pkg/apis/meta/v1.Condition"}, + "github.com/gptscript-ai/otto/pkg/storage/apis/otto.gptscript.ai/v1.Item", "k8s.io/apimachinery/pkg/apis/meta/v1.Condition"}, } } diff --git a/pkg/storage/selectors/selectors.go b/pkg/storage/selectors/selectors.go new file mode 100644 index 000000000..2e3a50051 --- /dev/null +++ b/pkg/storage/selectors/selectors.go @@ -0,0 +1,10 @@ +package selectors + +func RemoveEmpty(selector map[string]string) map[string]string { + for k, v := range selector { + if v == "" { + delete(selector, k) + } + } + return selector +}