Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

Commit

Permalink
add: sqlite-vec vectorDB implementation (#150)
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 authored Oct 21, 2024
1 parent da40858 commit 2d5d727
Show file tree
Hide file tree
Showing 6 changed files with 395 additions and 2 deletions.
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ require (
github.com/acorn-io/cmd v0.0.0-20240625164600-6c594fbd857e
github.com/acorn-io/z v0.0.0-20231104012607-4cab1b3ec5e5
github.com/adrg/xdg v0.4.0
github.com/asg017/sqlite-vec-go-bindings v0.1.4-alpha.2
github.com/cohere-ai/cohere-go/v2 v2.8.2
github.com/gabriel-vasile/mimetype v1.4.4
github.com/gen2brain/go-fitz v1.23.7
Expand All @@ -39,6 +40,7 @@ require (
github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06
github.com/lu4p/cat v0.1.5
github.com/mitchellh/mapstructure v1.5.0
github.com/ncruces/go-sqlite3 v0.19.0
github.com/pgvector/pgvector-go v0.2.2
github.com/philippgille/chromem-go v0.6.1-0.20240811154507-a1944285b284
github.com/spf13/cobra v1.8.1
Expand Down Expand Up @@ -135,6 +137,7 @@ require (
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/ncruces/julianday v1.0.0 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/otiai10/gosseract/v2 v2.2.4 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
Expand All @@ -154,6 +157,7 @@ require (
github.com/skeema/knownhosts v1.2.2 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/tetratelabs/wazero v1.8.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/unidoc/unioffice v1.33.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 h1:TEBmxO80TM04L
github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1/go.mod h1:SLqhdZcd+dF3TEVL2RMoob5bBP5R1P1qkox+HtCBgGI=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asg017/sqlite-vec-go-bindings v0.1.4-alpha.2 h1:FptYOWqJ+7rL5AXFt/AMS4eRtLF/uSQm6/x6vZPs5RU=
github.com/asg017/sqlite-vec-go-bindings v0.1.4-alpha.2/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go-v2 v1.27.2 h1:pLsTXqX93rimAOZG2FIYraDQstZaaGVVN4tNw65v0h8=
Expand Down Expand Up @@ -323,6 +325,10 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/ncruces/go-sqlite3 v0.19.0 h1:yebbD/cP8Gf+7nKoUin2ATjnqJK2VvyS30d3xsjRp5k=
github.com/ncruces/go-sqlite3 v0.19.0/go.mod h1:yL4ZNWGsr1/8pcLfpPW1RT1WFdvyeHonrgIwwi4rvkg=
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
github.com/olekukonko/tablewriter v0.0.0-20180506121414-d4647c9c7a84/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
Expand Down Expand Up @@ -402,6 +408,8 @@ github.com/swaggo/gin-swagger v1.6.0 h1:y8sxvQ3E20/RCyrXeFfg60r6H0Z+SwpTjMYsMm+z
github.com/swaggo/gin-swagger v1.6.0/go.mod h1:BG00cCEy294xtVpyIAHG6+e2Qzj/xKlRdOqDkvq0uzo=
github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
github.com/tetratelabs/wazero v1.8.1 h1:NrcgVbWfkWvVc4UtT4LRLDf91PsOzDzefMdwhLfA550=
github.com/tetratelabs/wazero v1.8.1/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo=
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
Expand Down
7 changes: 7 additions & 0 deletions pkg/flows/flows.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"slices"

"github.com/acorn-io/z"
"github.com/google/uuid"
"github.com/gptscript-ai/knowledge/pkg/datastore/store"
"github.com/gptscript-ai/knowledge/pkg/log"
vs "github.com/gptscript-ai/knowledge/pkg/vectorstore/types"
Expand Down Expand Up @@ -143,6 +144,12 @@ func (f *IngestionFlow) Run(ctx context.Context, reader io.Reader) ([]vs.Documen
}
transformerLog.With("status", "completed").Info("Transformed documents", "new_num_documents", len(docs))

for i, doc := range docs {
if doc.ID == "" {
docs[i].ID = uuid.NewString()
}
}

return docs, nil
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/vectorstore/chromem/chromem.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"strconv"
"strings"

"github.com/google/uuid"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
"github.com/gptscript-ai/knowledge/pkg/env"
"github.com/gptscript-ai/knowledge/pkg/log"
Expand Down Expand Up @@ -80,7 +79,7 @@ func (s *ChromemStore) AddDocuments(ctx context.Context, docs []vs.Document, col
ids := make([]string, len(docs))
chromemDocs := make([]chromem.Document, len(docs))
for docIdx, doc := range docs {
ids[docIdx] = uuid.NewString()
ids[docIdx] = doc.ID
mc := make(map[string]any)
maps.Copy(mc, doc.Metadata)
if len(doc.Content) == 0 {
Expand Down
Loading

0 comments on commit 2d5d727

Please sign in to comment.