Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

add: blueprint flows file #80

Merged
merged 2 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ifeq ($(GIT_TAG),)
GIT_TAG := $(shell git describe --always)
endif

GO_TAGS := netgo
GO_TAGS := netgo mupdf
LD_FLAGS := -s -w -X github.com/gptscript-ai/knowledge/version.Version=${GIT_TAG}
build:
go build -mod=mod -o bin/knowledge -tags "${GO_TAGS}" -ldflags '$(LD_FLAGS) ' .
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/askdir.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (s *ClientAskDir) Run(cmd *cobra.Command, args []string) error {
datasetID := client.HashPath(abspath)

slog.Debug("Loading ingestion flows from config", "flows_file", s.FlowsFile, "dataset", datasetID)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)
flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ package cmd
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"

"github.com/gptscript-ai/knowledge/pkg/client"
"github.com/gptscript-ai/knowledge/pkg/config"
"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
"io"
"os"
"path/filepath"
)

type Client struct {
Expand All @@ -25,7 +26,7 @@ type Client struct {
}

type ClientFlowsConfig struct {
FlowsFile string `usage:"Path to a YAML/JSON file containing ingestion/retrieval flows" env:"KNOW_FLOWS_FILE"`
FlowsFile string `usage:"Path to a YAML/JSON file containing ingestion/retrieval flows" env:"KNOW_FLOWS_FILE" default:""`
Flow string `usage:"Flow name" env:"KNOW_FLOW"`
}

Expand Down
8 changes: 5 additions & 3 deletions pkg/cmd/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package cmd

import (
"fmt"
"github.com/acorn-io/z"
"github.com/spf13/cobra"
"log/slog"
"strings"

"github.com/acorn-io/z"
"github.com/spf13/cobra"

"github.com/gptscript-ai/knowledge/pkg/client"
"github.com/gptscript-ai/knowledge/pkg/datastore/textsplitter"
flowconfig "github.com/gptscript-ai/knowledge/pkg/flows/config"
Expand Down Expand Up @@ -68,7 +69,8 @@ func (s *ClientIngest) Run(cmd *cobra.Command, args []string) error {

if s.FlowsFile != "" {
slog.Debug("Loading ingestion flows from config", "flows_file", s.FlowsFile, "dataset", datasetID)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)

flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/cmd/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"encoding/json"
"errors"
"fmt"
"log/slog"

"github.com/gptscript-ai/knowledge/pkg/datastore"
flowconfig "github.com/gptscript-ai/knowledge/pkg/flows/config"
vserr "github.com/gptscript-ai/knowledge/pkg/vectorstore/errors"
"github.com/spf13/cobra"
"log/slog"
)

type ClientRetrieve struct {
Expand Down Expand Up @@ -49,7 +50,7 @@ func (s *ClientRetrieve) Run(cmd *cobra.Command, args []string) error {

if s.FlowsFile != "" {
slog.Debug("Loading retrieval flows from config", "flows_file", s.FlowsFile, "dataset", datasetIDs)
flowCfg, err := flowconfig.FromFile(s.FlowsFile)
flowCfg, err := flowconfig.Load(s.FlowsFile)
if err != nil {
return err
}
Expand Down
21 changes: 21 additions & 0 deletions pkg/flows/config/blueprints.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package config

import (
_ "embed"
"fmt"
)

//go:embed blueprints/default.yaml
var BlueprintDefault []byte

var Blueprints = map[string][]byte{
"default": BlueprintDefault,
}

func GetBlueprint(name string) ([]byte, error) {
bp, ok := Blueprints[name]
if !ok {
return nil, fmt.Errorf("blueprint %q not found", name)
}
return bp, nil
}
42 changes: 42 additions & 0 deletions pkg/flows/config/blueprints/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
flows:
knowledge:
default: true
globals:
ingestion:
textsplitter:
chunkSize: 800
chunkOverlap: 400
ingestion:
- filetypes: ["*"]
retrieval:
querymodifiers:
# Enhance
- name: enhance
options:
model:
openai:
apiKey: "${OPENAI_API_KEY}"
model: gpt-4o
apiType: OPEN_AI
baseURL: https://api.openai.com/v1
retriever:
name: subquery
options:
limit: 3 # max. 3 subqueries
topK: 10 # topK per search
model:
openai:
apiKey: "${OPENAI_API_KEY}"
model: gpt-4o
apiType: OPEN_AI
baseURL: https://api.openai.com/v1
postprocessors:
- name: similarity
options:
threshold: 0.6
- name: reduce
options:
topK: 10



23 changes: 22 additions & 1 deletion pkg/flows/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package config
import (
"encoding/json"
"fmt"
"github.com/gptscript-ai/knowledge/pkg/output"
"log/slog"
"os"
"strings"

"github.com/gptscript-ai/knowledge/pkg/output"

"github.com/gptscript-ai/knowledge/pkg/datastore/documentloader"
"github.com/gptscript-ai/knowledge/pkg/datastore/postprocessors"
"github.com/gptscript-ai/knowledge/pkg/datastore/querymodifiers"
Expand Down Expand Up @@ -82,16 +83,36 @@ type TransformerConfig struct {
GenericBaseConfig
}

func FromBlueprint(name string) (*FlowConfig, error) {
bp, err := GetBlueprint(name)
if err != nil {
return nil, err
}
return FromBytes(bp)
}

func Load(reference string) (*FlowConfig, error) {
if strings.HasPrefix(reference, "blueprint:") {
return FromBlueprint(strings.TrimPrefix(reference, "blueprint:"))
}
return FromFile(reference)
}

// FromFile reads a configuration file and returns a FlowConfig.
func FromFile(filename string) (*FlowConfig, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
return FromBytes(content)
}

func FromBytes(content []byte) (*FlowConfig, error) {
// Expand environment variables in config
content = []byte(os.ExpandEnv(string(content)))

var err error

var config FlowConfig
jsondata := content
if !json.Valid(content) {
Expand Down