Skip to content

Commit

Permalink
chore: implement jsondb
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed Oct 14, 2023
1 parent 1e42468 commit 6362123
Show file tree
Hide file tree
Showing 8 changed files with 246 additions and 326 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -18,20 +18,22 @@ import (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := index.New(
jsondb.New("db.json"),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

docsVectorIndex := simplevectorindex.New("docs", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()
indexIsEmpty, _ := index.IsEmpty(context.Background())

if indexIsEmpty {
err := ingestData(docsVectorIndex, openaiEmbedder)
err := ingestData(index)
if err != nil {
panic(err)
}
}

query := "What is the purpose of the NATO Alliance?"
similarities, err := docsVectorIndex.Query(
similarities, err := index.Query(
context.Background(),
query,
indexoption.WithTopK(3),
Expand All @@ -52,7 +54,7 @@ func main() {
documentContext += similarity.Content() + "\n\n"
}

llmOpenAI := openai.NewCompletion()
llmOpenAI := openai.NewCompletion().WithVerbose(true)
prompt1 := prompt.NewPromptTemplate(
"Based on the following context answer to the question.\n\nContext:\n{{.context}}\n\nQuestion: {{.query}}").WithInputs(
map[string]string{
Expand All @@ -74,7 +76,7 @@ func main() {
fmt.Println(output)
}

func ingestData(docsVectorIndex *simplevectorindex.Index, openaiEmbedder index.Embedder) error {
func ingestData(index *index.Index) error {

fmt.Printf("Ingesting data...")

Expand All @@ -87,7 +89,7 @@ func ingestData(docsVectorIndex *simplevectorindex.Index, openaiEmbedder index.E

documentChunks := textSplitter.SplitDocuments(documents)

err = docsVectorIndex.LoadFromDocuments(context.Background(), documentChunks)
err = index.LoadFromDocuments(context.Background(), documentChunks)
if err != nil {
return err
}
Expand Down
22 changes: 13 additions & 9 deletions examples/embeddings/knowledge_base/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ import (

"github.com/henomis/lingoose/chat"
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"

"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -23,13 +25,15 @@ const (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := index.New(
jsondb.New("db.json"),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

docsVectorIndex := simplevectorindex.New("db", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()
indexIsEmpty, _ := index.IsEmpty(context.Background())

if indexIsEmpty {
err := ingestData(docsVectorIndex)
err := ingestData(index)
if err != nil {
panic(err)
}
Expand All @@ -49,7 +53,7 @@ func main() {
break
}

similarities, err := docsVectorIndex.Query(context.Background(), query, indexoption.WithTopK(3))
similarities, err := index.Query(context.Background(), query, indexoption.WithTopK(3))
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -98,11 +102,11 @@ func main() {

}

func ingestData(docsVectorIndex *simplevectorindex.Index) error {
func ingestData(index *index.Index) error {

fmt.Printf("Learning Knowledge Base...")

loader := loader.NewPDFToTextLoader("./kb")
loader := loader.NewPDFToTextLoader("./kb").WithPDFToTextPath("/opt/homebrew/bin/pdftotext")

documents, err := loader.Load(context.Background())
if err != nil {
Expand All @@ -113,7 +117,7 @@ func ingestData(docsVectorIndex *simplevectorindex.Index) error {

documentChunks := textSplitter.SplitDocuments(documents)

err = docsVectorIndex.LoadFromDocuments(context.Background(), documentChunks)
err = index.LoadFromDocuments(context.Background(), documentChunks)
if err != nil {
return err
}
Expand Down
5 changes: 3 additions & 2 deletions examples/embeddings/simplekb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"context"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
qapipeline "github.com/henomis/lingoose/pipeline/qa"
Expand All @@ -14,7 +15,7 @@ import (

func main() {
docs, _ := loader.NewPDFToTextLoader("./kb").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background())
index := simplevectorindex.New("db", ".", openaiembedder.New(openaiembedder.AdaEmbeddingV2))
index := index.New(jsondb.New("db.json"), openaiembedder.New(openaiembedder.AdaEmbeddingV2)).WithIncludeContents(true)
index.LoadFromDocuments(context.Background(), docs)
qapipeline.New(openai.NewChat().WithVerbose(true)).WithIndex(index).Query(context.Background(), "What is the NATO purpose?", option.WithTopK(1))
}
8 changes: 6 additions & 2 deletions examples/llm/cache/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@ import (
"strings"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/cache"
"github.com/henomis/lingoose/llm/openai"
)

func main() {

embedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := simplevectorindex.New("db", ".", embedder)
index := index.New(
jsondb.New("db.json"),
embedder,
)
llm := openai.NewCompletion().WithCompletionCache(cache.New(embedder, index).WithTopK(3))

for {
Expand Down
9 changes: 9 additions & 0 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
DefaultKeyContent = "content"
defaultBatchInsertSize = 32
defaultTopK = 10
defaultIncludeContent = true
)

type Data struct {
Expand Down Expand Up @@ -51,6 +52,7 @@ func New(vectorDB VectorDB, embedder Embedder) *Index {
vectorDB: vectorDB,
embedder: embedder,
batchInsertSize: defaultBatchInsertSize,
includeContent: defaultIncludeContent,
}
}

Expand All @@ -72,6 +74,13 @@ func (i *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu
return nil
}

func (i *Index) Add(ctx context.Context, data *Data) error {
if data == nil {
return nil
}
return i.vectorDB.Insert(ctx, []Data{*data})
}

func (i *Index) IsEmpty(ctx context.Context) (bool, error) {
return i.vectorDB.IsEmpty(ctx)
}
Expand Down
Loading

0 comments on commit 6362123

Please sign in to comment.