Skip to content

Commit

Permalink
Knowledge UX revamp
Browse files Browse the repository at this point in the history
Signed-off-by: Daishan Peng <[email protected]>
  • Loading branch information
StrongMonkey committed Oct 18, 2024
1 parent 33006ea commit 556f8b0
Show file tree
Hide file tree
Showing 35 changed files with 1,467 additions and 1,341 deletions.
2 changes: 2 additions & 0 deletions apiclient/types/knowledge.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ type KnowledgeFile struct {
IngestionStatus IngestionStatus `json:"ingestionStatus,omitempty"`
FileDetails FileDetails `json:"fileDetails,omitempty"`
UploadID string `json:"uploadID,omitempty"`
Approved *bool `json:"approved,omitempty"`
}

type FileDetails struct {
FilePath string `json:"filePath,omitempty"`
URL string `json:"url,omitempty"`
UpdatedAt string `json:"updatedAt,omitempty"`
Checksum string `json:"checksum,omitempty"`
Ingested bool `json:"ingested,omitempty"`
}

type IngestionStatus struct {
Expand Down
11 changes: 5 additions & 6 deletions apiclient/types/remoteKnowledgeSource.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,17 @@ type RemoteKnowledgeSource struct {

type RemoteKnowledgeSourceManifest struct {
SyncSchedule string `json:"syncSchedule,omitempty"`
AutoApprove *bool `json:"autoApprove,omitempty"`
RemoteKnowledgeSourceInput `json:",inline"`
}

type RemoteKnowledgeSourceList List[RemoteKnowledgeSource]

type RemoteKnowledgeSourceInput struct {
DisableIngestionAfterSync bool `json:"disableIngestionAfterSync,omitempty"`
SourceType RemoteKnowledgeSourceType `json:"sourceType,omitempty"`
Exclude []string `json:"exclude,omitempty"`
OneDriveConfig *OneDriveConfig `json:"onedriveConfig,omitempty"`
NotionConfig *NotionConfig `json:"notionConfig,omitempty"`
WebsiteCrawlingConfig *WebsiteCrawlingConfig `json:"websiteCrawlingConfig,omitempty"`
SourceType RemoteKnowledgeSourceType `json:"sourceType,omitempty"`
OneDriveConfig *OneDriveConfig `json:"onedriveConfig,omitempty"`
NotionConfig *NotionConfig `json:"notionConfig,omitempty"`
WebsiteCrawlingConfig *WebsiteCrawlingConfig `json:"websiteCrawlingConfig,omitempty"`
}

type OneDriveConfig struct {
Expand Down
15 changes: 10 additions & 5 deletions apiclient/types/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions pkg/api/handlers/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,27 @@ func (a *AgentHandler) UploadKnowledge(req api.Context) error {
return uploadKnowledge(req, a.gptscript, agent.Status.KnowledgeSetNames...)
}

func (a *AgentHandler) ApproveKnowledgeFile(req api.Context) error {
var body struct {
Approve bool `json:"approve"`
}

if err := req.Read(&body); err != nil {
return err
}
var file v1.KnowledgeFile
if err := req.Storage.Get(req.Context(), kclient.ObjectKey{
Namespace: req.Namespace(),
Name: req.PathValue("file_id"),
}, &file); err != nil {
return err
}

file.Spec.Approved = &[]bool{body.Approve}[0]

return req.Storage.Update(req.Context(), &file)
}

func (a *AgentHandler) DeleteKnowledge(req api.Context) error {
var agent v1.Agent
if err := req.Get(&agent, req.PathValue("id")); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/handlers/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func uploadKnowledgeToWorkspace(req api.Context, gClient *gptscript.GPTScript, w
Spec: v1.KnowledgeFileSpec{
FileName: filename,
WorkspaceName: ws.Name,
Approved: &[]bool{true}[0],
},
}

Expand All @@ -134,6 +135,7 @@ func convertKnowledgeFile(file v1.KnowledgeFile, ws v1.Workspace) types.Knowledg
RemoteKnowledgeSourceID: file.Spec.RemoteKnowledgeSourceName,
RemoteKnowledgeSourceType: file.Spec.RemoteKnowledgeSourceType,
UploadID: file.Status.UploadID,
Approved: file.Spec.Approved,
}
}

Expand Down
1 change: 1 addition & 0 deletions pkg/api/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func Router(services *services.Services) (http.Handler, error) {
// Agent knowledge files
mux.HandleFunc("GET /api/agents/{id}/knowledge", agents.Knowledge)
mux.HandleFunc("POST /api/agents/{id}/knowledge/{file}", agents.UploadKnowledge)
mux.HandleFunc("PUT /api/agents/{id}/knowledge/{file_id}/approve", agents.ApproveKnowledgeFile)
mux.HandleFunc("DELETE /api/agents/{id}/knowledge/{file...}", agents.DeleteKnowledge)

mux.HandleFunc("POST /api/agents/{agent_id}/remote-knowledge-sources", agents.CreateRemoteKnowledgeSource)
Expand Down
96 changes: 65 additions & 31 deletions pkg/controller/handlers/knowledge/knowledge.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"io"
"maps"
"path/filepath"
"sort"
"strings"
"time"
Expand All @@ -21,6 +22,7 @@ import (
"github.com/otto8-ai/otto8/pkg/events"
"github.com/otto8-ai/otto8/pkg/knowledge"
v1 "github.com/otto8-ai/otto8/pkg/storage/apis/otto.gptscript.ai/v1"
"github.com/otto8-ai/otto8/pkg/workspace"
"k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -70,28 +72,6 @@ func (a *Handler) DeleteKnowledge(req router.Request, _ router.Response) error {
return nil
}

func (a *Handler) isIngestionBlocked(ctx context.Context, c kclient.Client, ws *v1.Workspace) (bool, error) {
var ks v1.KnowledgeSet
if err := c.Get(ctx, router.Key(ws.Namespace, ws.Spec.KnowledgeSetName), &ks); err != nil {
return false, err
}

var rks v1.RemoteKnowledgeSourceList
if err := c.List(ctx, &rks, kclient.InNamespace(ws.Namespace), kclient.MatchingFields{
"spec.knowledgeSetName": ks.Name,
}); err != nil {
return false, err
}

for _, rks := range rks.Items {
if rks.Spec.Manifest.DisableIngestionAfterSync {
return true, nil
}
}

return false, nil
}

func (a *Handler) IngestKnowledge(req router.Request, resp router.Response) error {
ws := req.Object.(*v1.Workspace)
if !ws.Spec.IsKnowledge || ws.Spec.KnowledgeSetName == "" {
Expand All @@ -103,10 +83,6 @@ func (a *Handler) IngestKnowledge(req router.Request, resp router.Response) erro
return nil
}

if blocked, err := a.isIngestionBlocked(req.Ctx, req.Client, ws); blocked || err != nil {
return err
}

if !ws.Status.IngestionLastRunTime.IsZero() && ws.Status.IngestionLastRunTime.Add(30*time.Second).After(time.Now()) {
resp.RetryAfter(10 * time.Second)
return nil
Expand All @@ -119,21 +95,40 @@ func (a *Handler) IngestKnowledge(req router.Request, resp router.Response) erro
return err
}

if len(files.Items) == 0 {
var approvedFiles v1.KnowledgeFileList
for _, file := range files.Items {
if file.Spec.Approved != nil && *file.Spec.Approved {
approvedFiles.Items = append(approvedFiles.Items, file)
}
}

if len(approvedFiles.Items) == 0 {
return nil
}

// Sleep for 5 seconds before invoking to fetch the files. In case when files are approved at the same time, the first invoke will
// have partial file approve list. It will eventually have all files but the first ingestion will be incompleted. Sleeping for 5 seconds
// so that we can make sure we wait for the approved file that happens at the same time
time.Sleep(5 * time.Second)
if err := req.Client.List(req.Ctx, uncached.List(&files), kclient.InNamespace(ws.Namespace), kclient.MatchingFields{
"spec.workspaceName": ws.Name,
}); err != nil {
return err
}

sort.Slice(files.Items, func(i, j int) bool {
return files.Items[i].UID < files.Items[j].UID
})

digest := sha256.New()

for _, file := range files.Items {
digest.Write([]byte(file.Name))
digest.Write([]byte{0})
digest.Write([]byte(file.Status.FileDetails.UpdatedAt))
digest.Write([]byte{0})
if file.Spec.Approved != nil && *file.Spec.Approved {
digest.Write([]byte(file.Name))
digest.Write([]byte{0})
digest.Write([]byte(file.Status.FileDetails.UpdatedAt))
digest.Write([]byte{0})
}
}
var syncNeeded bool

Expand Down Expand Up @@ -161,6 +156,32 @@ func (a *Handler) IngestKnowledge(req router.Request, resp router.Response) erro
}

if syncNeeded {
var ignoreFileContent string
for _, file := range files.Items {
if file.Spec.Approved == nil || !*file.Spec.Approved {
if file.Status.FileDetails.FilePath != "" {
rel, err := filepath.Rel(workspace.GetDir(ws.Status.WorkspaceID), file.Status.FileDetails.FilePath)
if err != nil {
return fmt.Errorf("failed to get relative path for file: %w", err)
}
ignoreFileContent += fmt.Sprintf("%s\n", rel)
} else {
ignoreFileContent += fmt.Sprintf("%s\n", file.Spec.FileName)
}
}
}

if ignoreFileContent != "" {
err := a.gptscript.WriteFileInWorkspace(req.Ctx, ws.Status.WorkspaceID, ".knowignore", []byte(ignoreFileContent))
if err != nil {
return fmt.Errorf("failed to create knowledge metadata file: %w", err)
}
} else {
if err := a.gptscript.DeleteFileInWorkspace(req.Ctx, ws.Status.WorkspaceID, ".knowignore"); err != nil {
return fmt.Errorf("failed to delete ignore file: %w", err)
}
}

run, err := a.ingester.IngestKnowledge(req.Ctx, ws.GetNamespace(), ws.Spec.KnowledgeSetName, ws.Status.WorkspaceID)
if err != nil {
return err
Expand Down Expand Up @@ -282,6 +303,15 @@ func compileFileStatuses(ctx context.Context, client kclient.Client, ws *v1.Work
return final, errors.Join(errs...)
}

func (a *Handler) BindWorkspace(req router.Request, resp router.Response) error {
kFile := req.Object.(*v1.KnowledgeFile)
// force to associate with workspace so that it can reenqueue workspace when file are changed
if err := req.Get(&v1.Workspace{}, kFile.Namespace, kFile.Spec.WorkspaceName); err != nil {
return err
}
return nil
}

func (a *Handler) CleanupFile(req router.Request, resp router.Response) error {
kFile := req.Object.(*v1.KnowledgeFile)

Expand All @@ -297,5 +327,9 @@ func (a *Handler) CleanupFile(req router.Request, resp router.Response) error {
return err
}

if _, err := a.ingester.DeleteKnowledgeFiles(req.Ctx, kFile.Namespace, filepath.Join(workspace.GetDir(ws.Status.WorkspaceID), kFile.Spec.FileName)); err != nil {
return err
}

return nil
}
14 changes: 10 additions & 4 deletions pkg/controller/handlers/uploads/remoteknowledgesource.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,12 @@ func (u *UploadHandler) HandleUploadRun(req router.Request, resp router.Response
return err
}

if err := u.writeMetadataForKnowledge(req.Ctx, metadata.Output.Files, ws, remoteKnowledgeSource); err != nil {
knowledgeFileNamesFromOutput, err := compileKnowledgeFiles(req.Ctx, req.Client, remoteKnowledgeSource, metadata.Output.Files, ws)
if err != nil {
return err
}

knowledgeFileNamesFromOutput, err := compileKnowledgeFilesFromOneDriveConnector(req.Ctx, req.Client, remoteKnowledgeSource, metadata.Output.Files, ws)
if err != nil {
if err := u.writeMetadataForKnowledge(req.Ctx, metadata.Output.Files, ws, remoteKnowledgeSource); err != nil {
return err
}

Expand Down Expand Up @@ -313,7 +313,7 @@ func createFileMetadata(files map[string]types.FileDetails, ws v1.Workspace) map
return fileMetadata
}

func compileKnowledgeFilesFromOneDriveConnector(ctx context.Context, c client.Client,
func compileKnowledgeFiles(ctx context.Context, c client.Client,
remoteKnowledgeSource *v1.RemoteKnowledgeSource, files map[string]types.FileDetails,
ws *v1.Workspace) (map[string]struct{}, error) {
var (
Expand All @@ -322,6 +322,7 @@ func compileKnowledgeFilesFromOneDriveConnector(ctx context.Context, c client.Cl
outputDir = workspace.GetDir(ws.Status.WorkspaceID)
knowledgeFileNamesFromOutput = make(map[string]struct{}, len(files))
)

for id, v := range files {
fileRelPath, err := filepath.Rel(outputDir, v.FilePath)
if err != nil {
Expand All @@ -342,12 +343,16 @@ func compileKnowledgeFilesFromOneDriveConnector(ctx context.Context, c client.Cl
RemoteKnowledgeSourceType: remoteKnowledgeSource.Spec.Manifest.SourceType,
},
}
if remoteKnowledgeSource.Spec.Manifest.AutoApprove != nil && *remoteKnowledgeSource.Spec.Manifest.AutoApprove {
newKnowledgeFile.Spec.Approved = &[]bool{true}[0]
}
if err := c.Create(ctx, newKnowledgeFile); err == nil || apierrors.IsAlreadyExists(err) {
// If the file was created or already existed, ensure it has the latest details from the metadata.
if err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
if err := c.Get(ctx, router.Key(newKnowledgeFile.Namespace, newKnowledgeFile.Name), uncached.Get(newKnowledgeFile)); err != nil {
return err
}
v.Ingested = newKnowledgeFile.Status.IngestionStatus.Status == "finished" || newKnowledgeFile.Status.IngestionStatus.Status == "skipped"
if newKnowledgeFile.Status.UploadID == id && newKnowledgeFile.Status.FileDetails == v {
// The file has the correct details, no need to update.
return nil
Expand All @@ -362,6 +367,7 @@ func compileKnowledgeFilesFromOneDriveConnector(ctx context.Context, c client.Cl
} else if err != nil {
errs = append(errs, err)
}

}

if len(errs) > 0 {
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (c *Controller) setupRoutes() error {

// Knowledge files
root.Type(&v1.KnowledgeFile{}).HandlerFunc(cleanup.Cleanup)
root.Type(&v1.KnowledgeFile{}).HandlerFunc(knowledge.BindWorkspace)
root.Type(&v1.KnowledgeFile{}).FinalizeFunc(v1.KnowledgeFileFinalizer, knowledge.CleanupFile)

// Workspaces
Expand Down
11 changes: 11 additions & 0 deletions pkg/knowledge/knowledge.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ func (i *Ingester) IngestKnowledge(ctx context.Context, namespace, knowledgeSetN
)
}

func (i *Ingester) DeleteKnowledgeFiles(ctx context.Context, namespace, knowledgeFilePath string) (*invoke.Response, error) {
return i.invoker.SystemAction(
ctx,
"ingest-delete-file-",
namespace,
system.KnowledgeDeleteFileTool,
knowledgeFilePath,
)

}

func (i *Ingester) DeleteKnowledge(ctx context.Context, namespace, knowledgeSetName string) (*invoke.Response, error) {
return i.invoker.SystemAction(
ctx,
Expand Down
1 change: 1 addition & 0 deletions pkg/storage/apis/otto.gptscript.ai/v1/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type KnowledgeFileSpec struct {
WorkspaceName string `json:"workspaceName,omitempty"`
RemoteKnowledgeSourceName string `json:"remoteKnowledgeSourceName,omitempty"`
RemoteKnowledgeSourceType types.RemoteKnowledgeSourceType `json:"remoteKnowledgeSourceType,omitempty"`
Approved *bool `json:"approved,omitempty"`
}

type KnowledgeFileStatus struct {
Expand Down
Loading

0 comments on commit 556f8b0

Please sign in to comment.