From 25ed120e5d68c82b0f8abadc1e19e8299388fdbe Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 00:46:50 +0200 Subject: [PATCH 1/9] chore: add linter options --- .golangci.yml | 63 +++++++++++++++++++ chat/chat.go | 8 +-- embedder/cohere/cohere.go | 6 +- embedder/llamacpp/llamacpp.go | 6 +- embedder/openai/openai.go | 19 ++++-- .../llm/huggingface/textgeneration/main.go | 2 +- examples/llm/openai/batch/main.go | 2 +- examples/pipeline/callbacks/main.go | 4 +- examples/pipeline/history/main.go | 2 +- examples/pipeline/memory/main.go | 2 +- examples/pipeline/simple/main.go | 2 +- history/history.go | 22 +++---- index/pinecone/pinecone.go | 11 +++- index/qdrant/qdrant.go | 25 +++++--- index/simpleVectorIndex/simpleVectorIndex.go | 9 +-- llm/cache/cache.go | 6 +- llm/cohere/cohere.go | 10 +-- llm/huggingface/huggingface.go | 20 +++--- llm/huggingface/textgeneration.go | 3 +- llm/llamacpp/llamacpp.go | 10 ++- llm/mock/mock.go | 11 ++-- llm/openai/function.go | 6 +- llm/openai/openai.go | 18 +++--- loader/csv.go | 7 ++- loader/hf_image_to_text.go | 8 +-- loader/hf_speech_recognition.go | 2 +- loader/libreoffice.go | 1 + loader/pdf_to_text.go | 1 + loader/tesseract.go | 1 + loader/text.go | 2 +- loader/whispercpp.go | 22 ++++--- loader/youtube-dl.go | 1 + memory/ram/ram.go | 1 + pipeline/pipeline.go | 14 ++--- pipeline/qa/qa.go | 9 +-- pipeline/sql/mysql.go | 6 +- pipeline/sql/sql.go | 34 +++++----- pipeline/sql/sqlite.go | 1 + pipeline/summarize/summarize.go | 11 ++-- pipeline/tube.go | 52 +++++++-------- prompt/prompt.go | 1 + prompt/template.go | 12 ++-- prompt/whisper.go | 7 ++- textsplitter/recursiveTextSplitter.go | 6 +- textsplitter/textsplitter.go | 32 +++++----- transformer/cohere-rerank.go | 13 ++-- transformer/visual-question-answering.go | 8 +-- 47 files changed, 328 insertions(+), 191 deletions(-) create mode 100644 .golangci.yml diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 00000000..6f3b6898 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,63 @@ +# Copyright 2013-2023 The Cobra Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +run: + deadline: 5m + +linters: + disable-all: true + enable: + #- bodyclose + # - deadcode ! deprecated since v1.49.0; replaced by 'unused' + #- depguard + #- dogsled + #- dupl + - errcheck + - revive + #- exhaustive + #- funlen + - gas + #- gochecknoinits + - goconst + #- gocritic + #- gocyclo + #- gofmt + - goimports + # - golint + #- gomnd + #- goprintffuncname + #- gosec + #- gosimple + - govet + - ineffassign + # - interfacer + - lll + # - maligned + - megacheck + #- misspell + #- nakedret + #- noctx + #- nolintlint + #- rowserrcheck + #- scopelint + #- staticcheck + #- structcheck ! deprecated since v1.49.0; replaced by 'unused' + #- stylecheck + #- typecheck + - unconvert + #- unparam + - unused + # - varcheck ! deprecated since v1.49.0; replaced by 'unused' + #- whitespace + fast: false \ No newline at end of file diff --git a/chat/chat.go b/chat/chat.go index 2f67442b..ff79ad40 100644 --- a/chat/chat.go +++ b/chat/chat.go @@ -63,16 +63,16 @@ func (c *Chat) AddPromptMessages(messages []PromptMessage) { } } -func (p *Chat) addMessagePromptTemplate(message PromptMessage) { - p.promptMessages = append(p.promptMessages, message) +func (c *Chat) addMessagePromptTemplate(message PromptMessage) { + c.promptMessages = append(c.promptMessages, message) } // ToMessages converts the chat prompt template to a list of messages. -func (p *Chat) ToMessages() (Messages, error) { +func (c *Chat) ToMessages() (Messages, error) { var messages Messages var err error - for _, messagePromptTemplate := range p.promptMessages { + for _, messagePromptTemplate := range c.promptMessages { var message Message message.Type = messagePromptTemplate.Type message.Name = messagePromptTemplate.Name diff --git a/embedder/cohere/cohere.go b/embedder/cohere/cohere.go index 6da3aa8b..1e05d28c 100644 --- a/embedder/cohere/cohere.go +++ b/embedder/cohere/cohere.go @@ -52,13 +52,13 @@ func (e *Embedder) WithModel(model EmbedderModel) *Embedder { } // Embed returns the embeddings for the given texts -func (h *Embedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) { +func (e *Embedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) { resp := &response.Embed{} - err := h.client.Embed( + err := e.client.Embed( ctx, &request.Embed{ Texts: texts, - Model: &h.model, + Model: &e.model, }, resp, ) diff --git a/embedder/llamacpp/llamacpp.go b/embedder/llamacpp/llamacpp.go index 33598f73..747dffd1 100644 --- a/embedder/llamacpp/llamacpp.go +++ b/embedder/llamacpp/llamacpp.go @@ -43,11 +43,10 @@ func (l *LlamaCppEmbedder) WithArgs(llamacppArgs []string) *LlamaCppEmbedder { } // Embed returns the embeddings for the given texts -func (o *LlamaCppEmbedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) { - +func (l *LlamaCppEmbedder) Embed(ctx context.Context, texts []string) ([]embedder.Embedding, error) { embeddings := make([]embedder.Embedding, len(texts)) for i, text := range texts { - embedding, err := o.embed(ctx, text) + embedding, err := l.embed(ctx, text) if err != nil { return nil, err } @@ -66,6 +65,7 @@ func (l *LlamaCppEmbedder) embed(ctx context.Context, text string) (embedder.Emb llamacppArgs := []string{"-m", l.modelPath, "-p", text} llamacppArgs = append(llamacppArgs, l.llamacppArgs...) + //nolint:gosec out, err := exec.CommandContext(ctx, l.llamacppPath, llamacppArgs...).Output() if err != nil { return nil, err diff --git a/embedder/openai/openai.go b/embedder/openai/openai.go index 17648c28..431f6b5c 100644 --- a/embedder/openai/openai.go +++ b/embedder/openai/openai.go @@ -89,7 +89,11 @@ func (o *OpenAIEmbedder) Embed(ctx context.Context, texts []string) ([]embedder. return embeddings, nil } -func (o *OpenAIEmbedder) concurrentEmbed(ctx context.Context, texts []string, maxTokens int) ([]embedder.Embedding, error) { +func (o *OpenAIEmbedder) concurrentEmbed( + ctx context.Context, + texts []string, + maxTokens int, +) ([]embedder.Embedding, error) { type indexedEmbeddings struct { index int @@ -115,7 +119,7 @@ func (o *OpenAIEmbedder) concurrentEmbed(ctx context.Context, texts []string, ma } - var err error = nil + var err error for i := 0; i < len(texts); i++ { embedding := <-embeddingsChan if embedding.err != nil { @@ -187,7 +191,10 @@ func (o *OpenAIEmbedder) chunkText(text string, maxTokens int) ([]string, error) return textChunks, nil } -func (o *OpenAIEmbedder) getEmebeddingsForChunks(ctx context.Context, chunks []string) ([]embedder.Embedding, []float64, error) { +func (o *OpenAIEmbedder) getEmebeddingsForChunks( + ctx context.Context, + chunks []string, +) ([]embedder.Embedding, []float64, error) { chunkLens := []float64{} @@ -204,13 +211,13 @@ func (o *OpenAIEmbedder) getEmebeddingsForChunks(ctx context.Context, chunks []s } -func (t *OpenAIEmbedder) openAICreateEmebeddings(ctx context.Context, texts []string) ([]embedder.Embedding, error) { +func (o *OpenAIEmbedder) openAICreateEmebeddings(ctx context.Context, texts []string) ([]embedder.Embedding, error) { - resp, err := t.openAIClient.CreateEmbeddings( + resp, err := o.openAIClient.CreateEmbeddings( ctx, openai.EmbeddingRequest{ Input: texts, - Model: openai.EmbeddingModel(t.model), + Model: openai.EmbeddingModel(o.model), }, ) if err != nil { diff --git a/examples/llm/huggingface/textgeneration/main.go b/examples/llm/huggingface/textgeneration/main.go index a880762d..9727adfe 100644 --- a/examples/llm/huggingface/textgeneration/main.go +++ b/examples/llm/huggingface/textgeneration/main.go @@ -8,7 +8,7 @@ import ( func main() { - llm := huggingface.New("gpt2", 0.1, true).WithMode(huggingface.HuggingFaceModeTextGeneration) + llm := huggingface.New("gpt2", 0.1, true).WithMode(huggingface.ModeTextGeneration) _, err := llm.Completion(context.Background(), "What is the NATO purpose?") if err != nil { diff --git a/examples/llm/openai/batch/main.go b/examples/llm/openai/batch/main.go index 3a4b3edc..8ef3f3e1 100644 --- a/examples/llm/openai/batch/main.go +++ b/examples/llm/openai/batch/main.go @@ -28,7 +28,7 @@ func main() { err = llm.BatchCompletionStream( context.Background(), - []openai.OpenAIStreamCallback{ + []openai.StreamCallback{ func(output string) { fmt.Printf("{%s}", output) output1 += output diff --git a/examples/pipeline/callbacks/main.go b/examples/pipeline/callbacks/main.go index 946eec9b..57c6d7e9 100644 --- a/examples/pipeline/callbacks/main.go +++ b/examples/pipeline/callbacks/main.go @@ -31,13 +31,13 @@ func main() { }, ) - translatePreCallback := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + translatePreCallback := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { input["language"] = languages[iterator] input["sentence"] = sentence return input, nil }) - expandPostCallback := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + expandPostCallback := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { iterator++ if iterator >= len(languages) { pipeline.SetNextTubeExit(output) diff --git a/examples/pipeline/history/main.go b/examples/pipeline/history/main.go index e8f38b8a..a1438f66 100644 --- a/examples/pipeline/history/main.go +++ b/examples/pipeline/history/main.go @@ -15,7 +15,7 @@ import ( func main() { - history := history.NewHistoryRam() + history := history.NewHistoryRAM() llmChatOpenAI := openai.NewChat() diff --git a/examples/pipeline/memory/main.go b/examples/pipeline/memory/main.go index 9495cbc8..5c02e0e3 100644 --- a/examples/pipeline/memory/main.go +++ b/examples/pipeline/memory/main.go @@ -32,7 +32,7 @@ func main() { }, ) llm2 := pipeline.Llm{ - LlmEngine: &llmmock.JsonLllMock{}, + LlmEngine: &llmmock.JSONLllMock{}, LlmMode: pipeline.LlmModeCompletion, Prompt: prompt2, } diff --git a/examples/pipeline/simple/main.go b/examples/pipeline/simple/main.go index 8c8ea98f..708f199a 100644 --- a/examples/pipeline/simple/main.go +++ b/examples/pipeline/simple/main.go @@ -26,7 +26,7 @@ func main() { }, ) llm2 := pipeline.Llm{ - LlmEngine: &llmmock.JsonLllMock{}, + LlmEngine: &llmmock.JSONLllMock{}, LlmMode: pipeline.LlmModeCompletion, Prompt: prompt2, } diff --git a/history/history.go b/history/history.go index 10dbc616..d29b11c7 100644 --- a/history/history.go +++ b/history/history.go @@ -4,34 +4,34 @@ import ( "github.com/henomis/lingoose/types" ) -type HistoryMessageType string +type MessageType string -type HistoryMessage struct { +type Message struct { Content string `json:"content"` Meta types.Meta `json:"meta"` } // ***** History RAM implementation ***** -type HistoryRam struct { - history []HistoryMessage +type RAM struct { + history []Message } -func NewHistoryRam() *HistoryRam { - return &HistoryRam{} +func NewHistoryRAM() *RAM { + return &RAM{} } -func (h *HistoryRam) Add(content string, meta types.Meta) error { - h.history = append(h.history, HistoryMessage{ +func (h *RAM) Add(content string, meta types.Meta) error { + h.history = append(h.history, Message{ Content: content, Meta: meta, }) return nil } -func (h *HistoryRam) All() []HistoryMessage { +func (h *RAM) All() []Message { return h.history } -func (h *HistoryRam) Clear() { - h.history = []HistoryMessage{} +func (h *RAM) Clear() { + h.history = []Message{} } diff --git a/index/pinecone/pinecone.go b/index/pinecone/pinecone.go index 0896a674..e0754bf4 100644 --- a/index/pinecone/pinecone.go +++ b/index/pinecone/pinecone.go @@ -209,7 +209,11 @@ func (p *Index) query(ctx context.Context, query string, opts *option.Options) ( return p.similaritySearch(ctx, embeddings[0], opts) } -func (p *Index) similaritySearch(ctx context.Context, values []float64, opts *option.Options) ([]pineconeresponse.QueryMatch, error) { +func (p *Index) similaritySearch( + ctx context.Context, + values []float64, + opts *option.Options, +) ([]pineconeresponse.QueryMatch, error) { err := p.getProjectID(ctx) if err != nil { return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -406,7 +410,10 @@ func buildPineconeVectorsFromEmbeddingsAndDocuments( return vectors, nil } -func buildSearchResultsFromPineconeMatches(matches []pineconeresponse.QueryMatch, includeContent bool) index.SearchResults { +func buildSearchResultsFromPineconeMatches( + matches []pineconeresponse.QueryMatch, + includeContent bool, +) index.SearchResults { searchResults := make([]index.SearchResult, len(matches)) for i, match := range matches { diff --git a/index/qdrant/qdrant.go b/index/qdrant/qdrant.go index 956a12fa..b9244f62 100644 --- a/index/qdrant/qdrant.go +++ b/index/qdrant/qdrant.go @@ -92,18 +92,18 @@ func (q *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu return nil } -func (p *Index) IsEmpty(ctx context.Context) (bool, error) { +func (q *Index) IsEmpty(ctx context.Context) (bool, error) { - err := p.createCollectionIfRequired(ctx) + err := q.createCollectionIfRequired(ctx) if err != nil { return true, fmt.Errorf("%s: %w", index.ErrInternal, err) } res := &qdrantresponse.CollectionCollectInfo{} - err = p.qdrantClient.CollectionCollectInfo( + err = q.qdrantClient.CollectionCollectInfo( ctx, &qdrantrequest.CollectionCollectInfo{ - CollectionName: p.collectionName, + CollectionName: q.collectionName, }, res, ) @@ -179,7 +179,11 @@ func (q *Index) Query(ctx context.Context, query string, opts ...option.Option) return index.FilterSearchResults(searchResults, qdrantOptions.TopK), nil } -func (q *Index) similaritySearch(ctx context.Context, values []float64, opts *option.Options) ([]qdrantresponse.PointSearchResult, error) { +func (q *Index) similaritySearch( + ctx context.Context, + values []float64, + opts *option.Options, +) ([]qdrantresponse.PointSearchResult, error) { if opts.Filter == nil { opts.Filter = qdrantrequest.Filter{} @@ -205,7 +209,11 @@ func (q *Index) similaritySearch(ctx context.Context, values []float64, opts *op return res.Result, nil } -func (q *Index) query(ctx context.Context, query string, opts *option.Options) ([]qdrantresponse.PointSearchResult, error) { +func (q *Index) query( + ctx context.Context, + query string, + opts *option.Options, +) ([]qdrantresponse.PointSearchResult, error) { embeddings, err := q.embedder.Embed(ctx, []string{query}) if err != nil { return nil, err @@ -336,7 +344,10 @@ func buildQdrantPointsFromEmbeddingsAndDocuments( return vectors, nil } -func buildSearchResultsFromQdrantMatches(matches []qdrantresponse.PointSearchResult, includeContent bool) index.SearchResults { +func buildSearchResultsFromQdrantMatches( + matches []qdrantresponse.PointSearchResult, + includeContent bool, +) index.SearchResults { searchResults := make([]index.SearchResult, len(matches)) for i, match := range matches { diff --git a/index/simpleVectorIndex/simpleVectorIndex.go b/index/simpleVectorIndex/simpleVectorIndex.go index 621e0739..749830c2 100644 --- a/index/simpleVectorIndex/simpleVectorIndex.go +++ b/index/simpleVectorIndex/simpleVectorIndex.go @@ -34,7 +34,7 @@ type Index struct { embedder index.Embedder } -type SimpleVectorIndexFilterFn func([]index.SearchResult) []index.SearchResult +type FilterFn func([]index.SearchResult) []index.SearchResult func New(name string, outputPath string, embedder index.Embedder) *Index { simpleVectorIndex := &Index{ @@ -109,7 +109,7 @@ func (s Index) save() error { return err } - return os.WriteFile(s.database(), jsonContent, 0644) + return os.WriteFile(s.database(), jsonContent, 0600) } func (s *Index) load() error { @@ -144,6 +144,7 @@ func (s *Index) IsEmpty() (bool, error) { } func (s *Index) Add(ctx context.Context, item *index.Data) error { + _ = ctx err := s.load() if err != nil { return fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -214,7 +215,7 @@ func (s *Index) similaritySearch( embedding embedder.Embedding, opts *option.Options, ) (index.SearchResults, error) { - + _ = ctx scores := s.cosineSimilarityBatch(embedding) searchResults := make([]index.SearchResult, len(scores)) @@ -231,7 +232,7 @@ func (s *Index) similaritySearch( } if opts.Filter != nil { - searchResults = opts.Filter.(SimpleVectorIndexFilterFn)(searchResults) + searchResults = opts.Filter.(FilterFn)(searchResults) } return index.FilterSearchResults(searchResults, opts.TopK), nil diff --git a/llm/cache/cache.go b/llm/cache/cache.go index 4782d690..bee8c598 100644 --- a/llm/cache/cache.go +++ b/llm/cache/cache.go @@ -34,7 +34,7 @@ type Cache struct { scoreThreshold float64 } -type CacheResult struct { +type Result struct { Answer []string Embedding []float64 } @@ -58,7 +58,7 @@ func (c *Cache) WithScoreThreshold(scoreThreshold float64) *Cache { return c } -func (c *Cache) Get(ctx context.Context, query string) (*CacheResult, error) { +func (c *Cache) Get(ctx context.Context, query string) (*Result, error) { embedding, err := c.embedder.Embed(ctx, []string{query}) if err != nil { @@ -72,7 +72,7 @@ func (c *Cache) Get(ctx context.Context, query string) (*CacheResult, error) { answers, cacheHit := c.extractResults(results) if cacheHit { - return &CacheResult{ + return &Result{ Answer: answers, Embedding: embedding[0], }, nil diff --git a/llm/cohere/cohere.go b/llm/cohere/cohere.go index 51ce488f..4f326836 100644 --- a/llm/cohere/cohere.go +++ b/llm/cohere/cohere.go @@ -77,9 +77,9 @@ func (c *Cohere) WithVerbose(verbose bool) *Cohere { } // WithStop sets the stop sequences to use for the LLM -func (o *Cohere) WithStop(stop []string) *Cohere { - o.stop = stop - return o +func (c *Cohere) WithStop(stop []string) *Cohere { + c.stop = stop + return c } // Completion returns the completion for the given prompt @@ -87,7 +87,7 @@ func (c *Cohere) Completion(ctx context.Context, prompt string) (string, error) resp := &response.Generate{} err := c.client.Generate( - context.Background(), + ctx, &request.Generate{ Prompt: prompt, Temperature: &c.temperature, @@ -117,5 +117,7 @@ func (c *Cohere) Completion(ctx context.Context, prompt string) (string, error) // Chat is not implemented func (c *Cohere) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { + _ = ctx + _ = prompt return "", fmt.Errorf("not implemented") } diff --git a/llm/huggingface/huggingface.go b/llm/huggingface/huggingface.go index 4df44a85..e9d7f7e2 100644 --- a/llm/huggingface/huggingface.go +++ b/llm/huggingface/huggingface.go @@ -13,15 +13,15 @@ const ( ErrHuggingFaceCompletion = "huggingface completion error" ) -type HuggingFaceMode int +type Mode int const ( - HuggingFaceModeCoversational HuggingFaceMode = iota - HuggingFaceModeTextGeneration + ModeCoversational Mode = iota + ModeTextGeneration ) type HuggingFace struct { - mode HuggingFaceMode + mode Mode token string model string temperature float32 @@ -35,7 +35,7 @@ type HuggingFace struct { func New(model string, temperature float32, verbose bool) *HuggingFace { return &HuggingFace{ - mode: HuggingFaceModeCoversational, + mode: ModeCoversational, token: os.Getenv("HUGGING_FACE_HUB_TOKEN"), model: model, temperature: temperature, @@ -93,7 +93,7 @@ func (h *HuggingFace) WithTopP(topP float32) *HuggingFace { } // WithMode sets the mode to use for the LLM -func (h *HuggingFace) WithMode(mode HuggingFaceMode) *HuggingFace { +func (h *HuggingFace) WithMode(mode Mode) *HuggingFace { h.mode = mode return h } @@ -111,12 +111,12 @@ func (h *HuggingFace) Completion(ctx context.Context, prompt string) (string, er var outputs []string var err error switch h.mode { - case HuggingFaceModeTextGeneration: + case ModeTextGeneration: outputs, err = h.textgenerationCompletion(ctx, []string{prompt}) if err == nil { output = outputs[0] } - case HuggingFaceModeCoversational: + case ModeCoversational: fallthrough default: output, err = h.conversationalCompletion(ctx, prompt) @@ -135,9 +135,9 @@ func (h *HuggingFace) BatchCompletion(ctx context.Context, prompts []string) ([] var outputs []string var err error switch h.mode { - case HuggingFaceModeTextGeneration: + case ModeTextGeneration: outputs, err = h.textgenerationCompletion(ctx, prompts) - case HuggingFaceModeCoversational: + case ModeCoversational: fallthrough default: return nil, fmt.Errorf("batch completion not supported for conversational mode") diff --git a/llm/huggingface/textgeneration.go b/llm/huggingface/textgeneration.go index c5ae49fe..82c79099 100644 --- a/llm/huggingface/textgeneration.go +++ b/llm/huggingface/textgeneration.go @@ -65,7 +65,8 @@ func (h *HuggingFace) textgenerationCompletion(ctx context.Context, prompts []st return nil, err } if len(tgrespsRaw) != len(request.Inputs) { - return nil, fmt.Errorf("%s: expected %d responses, got %d; response=%s", ErrHuggingFaceCompletion, len(request.Inputs), len(tgrespsRaw), string(respBody)) + return nil, fmt.Errorf("%s: expected %d responses, got %d; response=%s", ErrHuggingFaceCompletion, + len(request.Inputs), len(tgrespsRaw), string(respBody)) } outputs := make([]string, len(request.Inputs)) diff --git a/llm/llamacpp/llamacpp.go b/llm/llamacpp/llamacpp.go index 35612a82..96ae0bb1 100644 --- a/llm/llamacpp/llamacpp.go +++ b/llm/llamacpp/llamacpp.go @@ -73,9 +73,15 @@ func (l *Llamacpp) Completion(ctx context.Context, prompt string) (string, error return "", err } - llamacppArgs := []string{"-m", l.modelPath, "-p", prompt, "-n", fmt.Sprintf("%d", l.maxTokens), "--temp", fmt.Sprintf("%.2f", l.temperature)} + llamacppArgs := []string{ + "-m", l.modelPath, + "-p", prompt, + "-n", fmt.Sprintf("%d", l.maxTokens), + "--temp", fmt.Sprintf("%.2f", l.temperature), + } llamacppArgs = append(llamacppArgs, l.llamacppArgs...) + //nolint:gosec out, err := exec.CommandContext(ctx, l.llamacppPath, llamacppArgs...).Output() if err != nil { return "", err @@ -90,5 +96,7 @@ func (l *Llamacpp) Completion(ctx context.Context, prompt string) (string, error } func (l *Llamacpp) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { + _ = ctx + _ = prompt return "", fmt.Errorf("not implemented") } diff --git a/llm/mock/mock.go b/llm/mock/mock.go index 4bf50314..21106d18 100644 --- a/llm/mock/mock.go +++ b/llm/mock/mock.go @@ -1,3 +1,4 @@ +// nolint // Package llmmock provides a mock implementation of the LLM interface. package llmmock @@ -62,9 +63,9 @@ func (l *LlmMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { return output, nil } -type JsonLllMock struct{} +type JSONLllMock struct{} -func (l *JsonLllMock) Completion(ctx context.Context, prompt string) (string, error) { +func (l *JSONLllMock) Completion(ctx context.Context, prompt string) (string, error) { fmt.Printf("User: %s\n", prompt) rand.Seed(time.Now().UnixNano()) @@ -83,7 +84,9 @@ func (l *JsonLllMock) Completion(ctx context.Context, prompt string) (string, er // this function has been generate by AI! ;) func getRandomStrings(number int) []string { - data := []string{"air", "fly", "ball", "kite", "tree", "grass", "house", "ocean", "river", "lake", "road", "bridge", "mountain", "valley", "desert", "flower", "wind", "book", "table", "chair", "television", "computer", "window", "door", "cup", "plate", "spoon", "fork", "knife", "bottle", "glass"} + data := []string{"air", "fly", "ball", "kite", "tree", "grass", "house", "ocean", "river", "lake", "road", + "bridge", "mountain", "valley", "desert", "flower", "wind", "book", "table", "chair", "television", "computer", + "window", "door", "cup", "plate", "spoon", "fork", "knife", "bottle", "glass"} rand.Seed(time.Now().UnixNano()) @@ -96,7 +99,7 @@ func getRandomStrings(number int) []string { return result } -func (l *JsonLllMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { +func (l *JSONLllMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { messages, err := prompt.ToMessages() if err != nil { diff --git a/llm/openai/function.go b/llm/openai/function.go index 9fcec765..5817318a 100644 --- a/llm/openai/function.go +++ b/llm/openai/function.go @@ -115,7 +115,7 @@ func structAsJSONSchema(v interface{}) (map[string]interface{}, error) { return jsonSchema, nil } -func callFnWithArgumentAsJson(fn interface{}, argumentAsJson string) (string, error) { +func callFnWithArgumentAsJSON(fn interface{}, argumentAsJSON string) (string, error) { // Get the type of the input function fnType := reflect.TypeOf(fn) @@ -135,7 +135,7 @@ func callFnWithArgumentAsJson(fn interface{}, argumentAsJson string) (string, er // Unmarshal the JSON string into an interface{} value var argValue interface{} - err := json.Unmarshal([]byte(argumentAsJson), &argValue) + err := json.Unmarshal([]byte(argumentAsJSON), &argValue) if err != nil { return "", fmt.Errorf("error unmarshaling argument: %s", err) } @@ -176,7 +176,7 @@ func (o *OpenAI) functionCall(response openai.ChatCompletionResponse) (string, e return "", fmt.Errorf("%s: unknown function %s", ErrOpenAIChat, response.Choices[0].Message.FunctionCall.Name) } - resultAsJSON, err := callFnWithArgumentAsJson(fn.Fn, response.Choices[0].Message.FunctionCall.Arguments) + resultAsJSON, err := callFnWithArgumentAsJSON(fn.Fn, response.Choices[0].Message.FunctionCall.Arguments) if err != nil { return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) } diff --git a/llm/openai/openai.go b/llm/openai/openai.go index 0297a873..ad0704f2 100644 --- a/llm/openai/openai.go +++ b/llm/openai/openai.go @@ -57,8 +57,8 @@ const ( GPT3Babbage Model = openai.GPT3Babbage ) -type OpenAIUsageCallback func(types.Meta) -type OpenAIStreamCallback func(string) +type UsageCallback func(types.Meta) +type StreamCallback func(string) type OpenAI struct { openAIClient *openai.Client @@ -67,7 +67,7 @@ type OpenAI struct { maxTokens int stop []string verbose bool - usageCallback OpenAIUsageCallback + usageCallback UsageCallback functions map[string]Function functionsMaxIterations uint calledFunctionName *string @@ -109,7 +109,7 @@ func (o *OpenAI) WithMaxTokens(maxTokens int) *OpenAI { } // WithUsageCallback sets the usage callback to use for the OpenAI instance. -func (o *OpenAI) WithCallback(callback OpenAIUsageCallback) *OpenAI { +func (o *OpenAI) WithCallback(callback UsageCallback) *OpenAI { o.usageCallback = callback return o } @@ -168,7 +168,7 @@ func NewChat() *OpenAI { // Completion returns a single completion for the given prompt. func (o *OpenAI) Completion(ctx context.Context, prompt string) (string, error) { - var cacheResult *cache.CacheResult + var cacheResult *cache.Result var err error if o.cache != nil { @@ -236,12 +236,12 @@ func (o *OpenAI) BatchCompletion(ctx context.Context, prompts []string) ([]strin } // CompletionStream returns a single completion stream for the given prompt. -func (o *OpenAI) CompletionStream(ctx context.Context, callbackFn OpenAIStreamCallback, prompt string) error { - return o.BatchCompletionStream(ctx, []OpenAIStreamCallback{callbackFn}, []string{prompt}) +func (o *OpenAI) CompletionStream(ctx context.Context, callbackFn StreamCallback, prompt string) error { + return o.BatchCompletionStream(ctx, []StreamCallback{callbackFn}, []string{prompt}) } // BatchCompletionStream returns multiple completion streams for the given prompts. -func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []OpenAIStreamCallback, prompts []string) error { +func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamCallback, prompts []string) error { stream, err := o.openAIClient.CreateCompletionStream( ctx, @@ -358,7 +358,7 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { } // ChatStream returns a single chat stream for the given prompt. -func (o *OpenAI) ChatStream(ctx context.Context, callbackFn OpenAIStreamCallback, prompt *chat.Chat) error { +func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prompt *chat.Chat) error { messages, err := buildMessages(prompt) if err != nil { diff --git a/loader/csv.go b/loader/csv.go index a5b155d6..2f0dfa7a 100644 --- a/loader/csv.go +++ b/loader/csv.go @@ -36,13 +36,14 @@ func (c *CSVLoader) WithSeparator(separator rune) *CSVLoader { return c } -func (t *CSVLoader) WithTextSplitter(textSplitter TextSplitter) *CSVLoader { +//nolint:revive +func (c *CSVLoader) WithTextSplitter(textSplitter TextSplitter) *CSVLoader { // can't split csv - return t + return c } func (c *CSVLoader) Load(ctx context.Context) ([]document.Document, error) { - + _ = ctx err := c.validate() if err != nil { return nil, err diff --git a/loader/hf_image_to_text.go b/loader/hf_image_to_text.go index e3cde3d2..d2df8723 100644 --- a/loader/hf_image_to_text.go +++ b/loader/hf_image_to_text.go @@ -62,7 +62,7 @@ func (h *HFImageToText) Load(ctx context.Context) ([]document.Document, error) { return nil, fmt.Errorf("%s: %w", ErrorInternal, err) } - responseBytes, err := hfMediaHttpCall(ctx, h.token, h.model, h.mediaFile) + responseBytes, err := hfMediaHTTPCall(ctx, h.token, h.model, h.mediaFile) if err != nil { return nil, fmt.Errorf("%s: %w", ErrorInternal, err) } @@ -96,7 +96,7 @@ func (h *HFImageToText) Load(ctx context.Context) ([]document.Document, error) { return documents, nil } -func hfMediaHttpCall(ctx context.Context, token, model, mediaFile string) ([]byte, error) { +func hfMediaHTTPCall(ctx context.Context, token, model, mediaFile string) ([]byte, error) { buf, err := os.ReadFile(mediaFile) if err != nil { return nil, err @@ -123,7 +123,7 @@ func hfMediaHttpCall(ctx context.Context, token, model, mediaFile string) ([]byt return nil, err } - err = hfCheckHttpResponse(respBody) + err = hfCheckHTTPResponse(respBody) if err != nil { return nil, err } @@ -131,7 +131,7 @@ func hfMediaHttpCall(ctx context.Context, token, model, mediaFile string) ([]byt return respBody, nil } -func hfCheckHttpResponse(respJSON []byte) error { +func hfCheckHTTPResponse(respJSON []byte) error { type apiError struct { Error string `json:"error,omitempty"` diff --git a/loader/hf_speech_recognition.go b/loader/hf_speech_recognition.go index 952849f8..0b97d7da 100644 --- a/loader/hf_speech_recognition.go +++ b/loader/hf_speech_recognition.go @@ -57,7 +57,7 @@ func (h *HFSpeechRecognition) Load(ctx context.Context) ([]document.Document, er return nil, fmt.Errorf("%s: %w", ErrorInternal, err) } - responseBytes, err := hfMediaHttpCall(ctx, h.token, h.model, h.mediaFile) + responseBytes, err := hfMediaHTTPCall(ctx, h.token, h.model, h.mediaFile) if err != nil { return nil, fmt.Errorf("%s: %w", ErrorInternal, err) } diff --git a/loader/libreoffice.go b/loader/libreoffice.go index bd116cfd..066207f8 100644 --- a/loader/libreoffice.go +++ b/loader/libreoffice.go @@ -73,6 +73,7 @@ func (l *LibreOfficeLoader) loadFile(ctx context.Context) ([]document.Document, libreOfficeArgs := append(l.libreOfficeArgs, l.filename) + //nolint:gosec out, err := exec.CommandContext(ctx, l.libreOfficePath, libreOfficeArgs...).Output() if err != nil { return nil, err diff --git a/loader/pdf_to_text.go b/loader/pdf_to_text.go index 7e87e76d..ee0d7851 100644 --- a/loader/pdf_to_text.go +++ b/loader/pdf_to_text.go @@ -71,6 +71,7 @@ func (p *PDFLoader) Load(ctx context.Context) ([]document.Document, error) { } func (p *PDFLoader) loadFile(ctx context.Context) ([]document.Document, error) { + //nolint:gosec out, err := exec.CommandContext(ctx, p.pdfToTextPath, p.path, "-").Output() if err != nil { return nil, err diff --git a/loader/tesseract.go b/loader/tesseract.go index 07c984c1..97f7d2b0 100644 --- a/loader/tesseract.go +++ b/loader/tesseract.go @@ -74,6 +74,7 @@ func (l *TesseractLoader) loadFile(ctx context.Context) ([]document.Document, er tesseractArgs := []string{l.filename, "stdout"} tesseractArgs = append(tesseractArgs, l.tesseractArgs...) + //nolint:gosec out, err := exec.CommandContext(ctx, l.tesseractPath, tesseractArgs...).Output() if err != nil { return nil, err diff --git a/loader/text.go b/loader/text.go index 900c0bcd..0f60cbe2 100644 --- a/loader/text.go +++ b/loader/text.go @@ -29,7 +29,7 @@ func (t *TextLoader) WithTextSplitter(textSplitter TextSplitter) *TextLoader { } func (t *TextLoader) Load(ctx context.Context) ([]document.Document, error) { - + _ = ctx err := t.validate() if err != nil { return nil, err diff --git a/loader/whispercpp.go b/loader/whispercpp.go index 9a8b3105..22ae0e9e 100644 --- a/loader/whispercpp.go +++ b/loader/whispercpp.go @@ -11,7 +11,7 @@ import ( "github.com/henomis/lingoose/types" ) -type whisperCppLoader struct { +type WhisperCppLoader struct { loader Loader ffmpegPath string @@ -24,8 +24,8 @@ type whisperCppLoader struct { var whisperSanitizeRegexp = regexp.MustCompile(`\[.*?\]`) -func NewWhisperCppLoader(filename string) *whisperCppLoader { - return &whisperCppLoader{ +func NewWhisperCppLoader(filename string) *WhisperCppLoader { + return &WhisperCppLoader{ filename: filename, ffmpegPath: "/usr/bin/ffmpeg", ffmpegArgs: []string{"-nostdin", "-f", "wav", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", "-"}, @@ -35,32 +35,32 @@ func NewWhisperCppLoader(filename string) *whisperCppLoader { } } -func (w *whisperCppLoader) WithTextSplitter(textSplitter TextSplitter) *whisperCppLoader { +func (w *WhisperCppLoader) WithTextSplitter(textSplitter TextSplitter) *WhisperCppLoader { w.loader.textSplitter = textSplitter return w } -func (w *whisperCppLoader) WithFfmpegPath(ffmpegPath string) *whisperCppLoader { +func (w *WhisperCppLoader) WithFfmpegPath(ffmpegPath string) *WhisperCppLoader { w.ffmpegPath = ffmpegPath return w } -func (w *whisperCppLoader) WithWhisperCppPath(whisperCppPath string) *whisperCppLoader { +func (w *WhisperCppLoader) WithWhisperCppPath(whisperCppPath string) *WhisperCppLoader { w.whisperCppPath = whisperCppPath return w } -func (w *whisperCppLoader) WithModel(whisperCppModelPath string) *whisperCppLoader { +func (w *WhisperCppLoader) WithModel(whisperCppModelPath string) *WhisperCppLoader { w.whisperCppModelPath = whisperCppModelPath return w } -func (w *whisperCppLoader) WithArgs(whisperCppArgs []string) *whisperCppLoader { +func (w *WhisperCppLoader) WithArgs(whisperCppArgs []string) *WhisperCppLoader { w.whisperCppArgs = whisperCppArgs return w } -func (w *whisperCppLoader) Load(ctx context.Context) ([]document.Document, error) { +func (w *WhisperCppLoader) Load(ctx context.Context) ([]document.Document, error) { err := isFile(w.ffmpegPath) if err != nil { @@ -98,15 +98,17 @@ func (w *whisperCppLoader) Load(ctx context.Context) ([]document.Document, error return documents, nil } -func (w *whisperCppLoader) convertAndTrascribe(ctx context.Context) (string, error) { +func (w *WhisperCppLoader) convertAndTrascribe(ctx context.Context) (string, error) { ffmpegArgs := []string{"-i", w.filename} ffmpegArgs = append(ffmpegArgs, w.ffmpegArgs...) + //nolint:gosec ffmpeg := exec.CommandContext(ctx, w.ffmpegPath, ffmpegArgs...) whisperCppArgs := []string{"-m", w.whisperCppModelPath, "-nt", "-f", "-"} whisperCppArgs = append(w.whisperCppArgs, whisperCppArgs...) + //nolint:gosec whispercpp := exec.CommandContext(ctx, w.whisperCppPath, whisperCppArgs...) pipeReader, pipeWriter := io.Pipe() diff --git a/loader/youtube-dl.go b/loader/youtube-dl.go index b2f4c797..a915a60c 100644 --- a/loader/youtube-dl.go +++ b/loader/youtube-dl.go @@ -93,6 +93,7 @@ func (y *YoutubeDLLoader) loadVideo(ctx context.Context) ([]document.Document, e y.path, } + //nolint:gosec cmd := exec.CommandContext(ctx, y.youtubeDlPath, args...) cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout diff --git a/memory/ram/ram.go b/memory/ram/ram.go index b0325712..3ebb232a 100644 --- a/memory/ram/ram.go +++ b/memory/ram/ram.go @@ -11,6 +11,7 @@ var ( ErrObjectNotFound = errors.New("object not found") ) +//nolint:revive type Ram struct { memory types.M } diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 25f2c170..b12581f6 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -35,12 +35,12 @@ type Pipe interface { Run(ctx context.Context, input types.M) (types.M, error) } -type PipelineCallback func(ctx context.Context, values types.M) (types.M, error) +type Callback func(ctx context.Context, values types.M) (types.M, error) type Pipeline struct { pipes map[int]Pipe - preCallbacks map[int]PipelineCallback - postCallbacks map[int]PipelineCallback + preCallbacks map[int]Callback + postCallbacks map[int]Callback } func New(pipes ...Pipe) *Pipeline { @@ -55,9 +55,9 @@ func New(pipes ...Pipe) *Pipeline { } } -func (p *Pipeline) WithPreCallbacks(callbacks ...PipelineCallback) *Pipeline { +func (p *Pipeline) WithPreCallbacks(callbacks ...Callback) *Pipeline { - p.preCallbacks = make(map[int]PipelineCallback) + p.preCallbacks = make(map[int]Callback) for i, callback := range callbacks { p.preCallbacks[i] = callback } @@ -65,9 +65,9 @@ func (p *Pipeline) WithPreCallbacks(callbacks ...PipelineCallback) *Pipeline { return p } -func (p *Pipeline) WithPostCallbacks(callbacks ...PipelineCallback) *Pipeline { +func (p *Pipeline) WithPostCallbacks(callbacks ...Callback) *Pipeline { - p.postCallbacks = make(map[int]PipelineCallback) + p.postCallbacks = make(map[int]Callback) for i, callback := range callbacks { p.postCallbacks[i] = callback } diff --git a/pipeline/qa/qa.go b/pipeline/qa/qa.go index c18a3efe..f7cebdfe 100644 --- a/pipeline/qa/qa.go +++ b/pipeline/qa/qa.go @@ -18,6 +18,7 @@ const ( Don't add any information that is not in the context. If you don't know the answer, just say 'I don't know'.` + //nolint:lll qaTubeUserPromptTemplate = "Based on the following context answer to the question.\n\nContext:\n{{.context}}\n\nQuestion: {{.query}}" ) @@ -61,9 +62,9 @@ func New(llmEngine pipeline.LlmEngine) *QAPipeline { } } -func (p *QAPipeline) WithPrompt(chat *chat.Chat) *QAPipeline { +func (q *QAPipeline) WithPrompt(chat *chat.Chat) *QAPipeline { llm := pipeline.Llm{ - LlmEngine: p.llmEngine, + LlmEngine: q.llmEngine, LlmMode: pipeline.LlmModeChat, Chat: chat, } @@ -93,14 +94,14 @@ func (q *QAPipeline) Query(ctx context.Context, query string, opts ...indexoptio return q.Run(ctx, query, docs.ToDocuments()) } -func (t *QAPipeline) Run(ctx context.Context, query string, documents []document.Document) (types.M, error) { +func (q *QAPipeline) Run(ctx context.Context, query string, documents []document.Document) (types.M, error) { content := "" for _, document := range documents { content += document.Content + "\n" } - return t.pipeline.Run( + return q.pipeline.Run( ctx, types.M{ "query": query, diff --git a/pipeline/sql/mysql.go b/pipeline/sql/mysql.go index 44d71210..a6288128 100644 --- a/pipeline/sql/mysql.go +++ b/pipeline/sql/mysql.go @@ -6,6 +6,7 @@ import ( "strings" ) +//nolint:lll const mysqlDataSourcePromptTemplate = "\n" + "You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.\n" + "Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.\n" + @@ -17,6 +18,7 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { var schema string // Retrieve table names + //nolint:lll rows, err := db.Query(fmt.Sprintf("SELECT table_name FROM information_schema.tables WHERE table_schema = '%s'", dbName)) if err != nil { return "", err @@ -79,6 +81,7 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { } // Retrieve foreign key information + //nolint:lll fks, err := db.Query(fmt.Sprintf("SELECT constraint_name, column_name, referenced_table_name, referenced_column_name FROM information_schema.key_column_usage WHERE table_schema = '%s' AND table_name = '%s' AND referenced_table_name IS NOT NULL", dbName, tableName)) if err != nil { return "", err @@ -98,7 +101,8 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { return "", err } - fkDef := fmt.Sprintf(" CONSTRAINT %s FOREIGN KEY (%s) REFERENCES %s (%s)", constraintName, columnName, referencedTableName, referencedColumnName) + fkDef := fmt.Sprintf(" CONSTRAINT %s FOREIGN KEY (%s) REFERENCES %s (%s)", + constraintName, columnName, referencedTableName, referencedColumnName) fkDefs = append(fkDefs, fkDef) } diff --git a/pipeline/sql/sql.go b/pipeline/sql/sql.go index 37e77638..89f4babb 100644 --- a/pipeline/sql/sql.go +++ b/pipeline/sql/sql.go @@ -52,9 +52,13 @@ type llmWithStop interface { SetStop([]string) } -type SqlDDLFn func() (string, error) +type SQLDDLFn func() (string, error) -func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSourceName string) (*pipeline.Pipeline, error) { +func New( + llmEngine pipeline.LlmEngine, + dataSourceType DataSourceType, + dataSourceName string, +) (*pipeline.Pipeline, error) { memory := types.M{} @@ -89,7 +93,7 @@ func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSource // ********** QUERY TUBE ************// query := pipeline.NewTube(queryLLM).WithDecoder(decoder.NewRegExDecoder(sqlQueryRegexExpr)) - preQueryCB := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + preQueryCB := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { if q, ok := input[questionKey].(string); ok { memory[questionKey] = q } @@ -97,7 +101,7 @@ func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSource return preQueryCBFn(input, sqlDDL) }) - postQueryCB := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + postQueryCB := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { return postQueryCBFn(output, db, sqlDDL, memory) }) // ********** END QUERY TUBE ************// @@ -113,11 +117,11 @@ func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSource refine := pipeline.NewTube(refineLLM).WithDecoder(decoder.NewRegExDecoder(sqlQueryRegexExpr)) - preRefineCB := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + preRefineCB := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { return preRefineCBFn(input, sqlDDL, memory) }) - postRefineCBFn := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + postRefineCBFn := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { return postRefineCBFn(output, db, sqlDDL, memory) }) @@ -135,11 +139,11 @@ func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSource describe := pipeline.NewTube(describeLLM) - preDescribeCB := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + preDescribeCB := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { return preDescribeCBFn(input, sqlDDL, memory) }) - postDescribeCB := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + postDescribeCB := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { output[sqlQueryKey] = memory[sqlQueryKey] output[sqlResultKey] = memory[sqlResultKey] return output, nil @@ -147,7 +151,9 @@ func New(llmEngine pipeline.LlmEngine, dataSourceType DataSourceType, dataSource // ********** END DESCRIBE ************// - sqlPipeline := pipeline.New(query, refine, describe).WithPreCallbacks(preQueryCB, preRefineCB, preDescribeCB).WithPostCallbacks(postQueryCB, postRefineCBFn, postDescribeCB) + sqlPipeline := pipeline.New(query, refine, describe). + WithPreCallbacks(preQueryCB, preRefineCB, preDescribeCB). + WithPostCallbacks(postQueryCB, postRefineCBFn, postDescribeCB) return sqlPipeline, nil @@ -181,7 +187,7 @@ func preDescribeCBFn(input types.M, sqlDDL string, memory types.M) (types.M, err } func postQueryCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) (types.M, error) { - + _ = sqlDDL sqlQueryMatches, ok := output[types.DefaultOutputKey].([]string) if !ok || len(sqlQueryMatches) != 1 { return output, nil @@ -192,7 +198,7 @@ func postQueryCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) (t output[sqlQueryKey] = sqlQuery memory[sqlQueryKey] = sqlQuery - sqlResult, err := getSqlResult(db, sqlQuery) + sqlResult, err := getSQLResult(db, sqlQuery) memory[sqlResultKey] = sqlResult @@ -208,14 +214,14 @@ func postQueryCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) (t } func postRefineCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) (types.M, error) { - + _ = sqlDDL sqlQueryMatches, ok := output[types.DefaultOutputKey].([]string) if !ok || len(sqlQueryMatches) != 1 { return output, nil } sqlQuery := sqlQueryMatches[0] - sqlResult, err := getSqlResult(db, sqlQuery) + sqlResult, err := getSQLResult(db, sqlQuery) output[sqlResultKey] = sqlResult output[sqlQueryKey] = sqlQuery @@ -232,7 +238,7 @@ func postRefineCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) ( return output, nil } -func getSqlResult(db *sql.DB, query string) (string, error) { +func getSQLResult(db *sql.DB, query string) (string, error) { rows, err := db.Query(query) if err != nil { diff --git a/pipeline/sql/sqlite.go b/pipeline/sql/sqlite.go index 5636582f..49cd03f4 100644 --- a/pipeline/sql/sqlite.go +++ b/pipeline/sql/sqlite.go @@ -6,6 +6,7 @@ import ( "strings" ) +//nolint:lll const sqliteDataSourcePromptTemplate = ` You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question. Unless the user specifies in the question a specific number of examples to obtain, query for at most {{.top_k}} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database. diff --git a/pipeline/summarize/summarize.go b/pipeline/summarize/summarize.go index 71041776..05cbd2cd 100644 --- a/pipeline/summarize/summarize.go +++ b/pipeline/summarize/summarize.go @@ -33,7 +33,7 @@ func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { } summary := pipeline.NewTube(summarizeLLM) - preSummaryCB := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + preSummaryCB := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { var err error docs, err = loader.Load(ctx) if err != nil { @@ -49,7 +49,7 @@ func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { "text": docs[iterator].Content, }, nil }) - postSummaryCB := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + postSummaryCB := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { remainigDocs-- iterator++ if remainigDocs == 0 { @@ -60,12 +60,12 @@ func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { }) refine := pipeline.NewTube(refineLLM) - preRefineCB := pipeline.PipelineCallback(func(ctx context.Context, input types.M) (types.M, error) { + preRefineCB := pipeline.Callback(func(ctx context.Context, input types.M) (types.M, error) { input["text"] = docs[iterator].Content return input, nil }) - postRefineCB := pipeline.PipelineCallback(func(ctx context.Context, output types.M) (types.M, error) { + postRefineCB := pipeline.Callback(func(ctx context.Context, output types.M) (types.M, error) { remainigDocs-- iterator++ if remainigDocs == 0 { @@ -76,7 +76,8 @@ func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { return output, nil }) - summarizePipeline := pipeline.New(summary, refine).WithPreCallbacks(preSummaryCB, preRefineCB).WithPostCallbacks(postSummaryCB, postRefineCB) + summarizePipeline := pipeline.New(summary, refine). + WithPreCallbacks(preSummaryCB, preRefineCB).WithPostCallbacks(postSummaryCB, postRefineCB) return summarizePipeline diff --git a/pipeline/tube.go b/pipeline/tube.go index 0a456a70..9e694850 100644 --- a/pipeline/tube.go +++ b/pipeline/tube.go @@ -49,7 +49,7 @@ func (t *Tube) WithDecoder(decoder Decoder) *Tube { // Run execute the step and return the output. // The prompt is formatted with the input and the output of the prompt is used as input for the LLM. // If the step has a memory, the output is stored in the memory. -func (s *Tube) Run(ctx context.Context, input types.M) (types.M, error) { +func (t *Tube) Run(ctx context.Context, input types.M) (types.M, error) { if input == nil { input = types.M{} @@ -60,22 +60,22 @@ func (s *Tube) Run(ctx context.Context, input types.M) (types.M, error) { return nil, fmt.Errorf("%s: %w", ErrDecoding, err) } - if s.memory != nil { - input = mergeMaps(input, s.memory.All()) + if t.memory != nil { + input = mergeMaps(input, t.memory.All()) } - response, err := s.executeLLM(ctx, input) + response, err := t.executeLLM(ctx, input) if err != nil { return nil, fmt.Errorf("%s: %w", ErrLLMExecution, err) } - decodedOutput, err := s.decoder.Decode(response) + decodedOutput, err := t.decoder.Decode(response) if err != nil { return nil, fmt.Errorf("%s: %w", ErrDecoding, err) } - if s.memory != nil { - err = s.memory.Set(s.namespace, decodedOutput) + if t.memory != nil { + err = t.memory.Set(t.namespace, decodedOutput) if err != nil { return nil, err } @@ -85,36 +85,36 @@ func (s *Tube) Run(ctx context.Context, input types.M) (types.M, error) { } -func (s *Tube) executeLLM(ctx context.Context, input types.M) (string, error) { - if s.llm.LlmMode == LlmModeCompletion { - return s.executeLLMCompletion(ctx, input) - } else if s.llm.LlmMode == LlmModeChat { - return s.executeLLMChat(ctx, input) +func (t *Tube) executeLLM(ctx context.Context, input types.M) (string, error) { + if t.llm.LlmMode == LlmModeCompletion { + return t.executeLLMCompletion(ctx, input) + } else if t.llm.LlmMode == LlmModeChat { + return t.executeLLMChat(ctx, input) } return "", ErrInvalidLmmMode } -func (s *Tube) executeLLMCompletion(ctx context.Context, input types.M) (string, error) { - err := s.llm.Prompt.Format(input) +func (t *Tube) executeLLMCompletion(ctx context.Context, input types.M) (string, error) { + err := t.llm.Prompt.Format(input) if err != nil { return "", err } - if s.history != nil { - err = s.history.Add(s.llm.Prompt.String(), nil) + if t.history != nil { + err = t.history.Add(t.llm.Prompt.String(), nil) if err != nil { return "", err } } - response, err := s.llm.LlmEngine.Completion(ctx, s.llm.Prompt.String()) + response, err := t.llm.LlmEngine.Completion(ctx, t.llm.Prompt.String()) if err != nil { return "", err } - if s.history != nil { - err = s.history.Add(response, nil) + if t.history != nil { + err = t.history.Add(response, nil) if err != nil { return "", err } @@ -123,16 +123,16 @@ func (s *Tube) executeLLMCompletion(ctx context.Context, input types.M) (string, return response, nil } -func (s *Tube) executeLLMChat(ctx context.Context, input types.M) (string, error) { +func (t *Tube) executeLLMChat(ctx context.Context, input types.M) (string, error) { - for _, promptMessage := range s.llm.Chat.PromptMessages() { + for _, promptMessage := range t.llm.Chat.PromptMessages() { err := promptMessage.Prompt.Format(input) if err != nil { return "", err } - if s.history != nil { - err = s.history.Add( + if t.history != nil { + err = t.history.Add( promptMessage.Prompt.String(), types.Meta{ "role": promptMessage.Type, @@ -144,13 +144,13 @@ func (s *Tube) executeLLMChat(ctx context.Context, input types.M) (string, error } } - response, err := s.llm.LlmEngine.Chat(ctx, s.llm.Chat) + response, err := t.llm.LlmEngine.Chat(ctx, t.llm.Chat) if err != nil { return "", err } - if s.history != nil { - err = s.history.Add( + if t.history != nil { + err = t.history.Add( response, types.Meta{ "role": chat.MessageTypeAssistant, diff --git a/prompt/prompt.go b/prompt/prompt.go index 238114f4..6de91f00 100644 --- a/prompt/prompt.go +++ b/prompt/prompt.go @@ -25,6 +25,7 @@ func New(text string) *Prompt { } func (p *Prompt) Format(input types.M) error { + _ = input return nil } diff --git a/prompt/template.go b/prompt/template.go index 659ac4f6..10c97b36 100644 --- a/prompt/template.go +++ b/prompt/template.go @@ -62,22 +62,22 @@ func (t *Template) Format(input types.M) error { return nil } -func (p *Template) String() string { - return p.value +func (t *Template) String() string { + return t.value } -func (p *Template) initTemplateEngine() error { +func (t *Template) initTemplateEngine() error { - if p.templateEngine != nil { + if t.templateEngine != nil { return nil } - templateEngine, err := texttemplate.New("prompt").Option("missingkey=zero").Parse(p.template) + templateEngine, err := texttemplate.New("prompt").Option("missingkey=zero").Parse(t.template) if err != nil { return fmt.Errorf("%s: %w", ErrTemplateEngine, err) } - p.templateEngine = templateEngine + t.templateEngine = templateEngine return nil } diff --git a/prompt/whisper.go b/prompt/whisper.go index b9b6a0a3..188ea533 100644 --- a/prompt/whisper.go +++ b/prompt/whisper.go @@ -25,7 +25,11 @@ type WhisperPrompt struct { audioResponseFormat AudioResponseFormat } -func NewPromptFromAudioFile(ctx context.Context, filePath string, audioResponseFormat AudioResponseFormat) (*WhisperPrompt, error) { +func NewPromptFromAudioFile( + ctx context.Context, + filePath string, + audioResponseFormat AudioResponseFormat, +) (*WhisperPrompt, error) { openAIKey := os.Getenv("OPENAI_API_KEY") if openAIKey == "" { return nil, fmt.Errorf("OPENAI_API_KEY not set") @@ -45,6 +49,7 @@ func (p *WhisperPrompt) WithClient(client *openai.Client) *WhisperPrompt { } func (p *WhisperPrompt) Format(input types.M) error { + _ = input return nil } diff --git a/textsplitter/recursiveTextSplitter.go b/textsplitter/recursiveTextSplitter.go index 1abe3a08..07d00211 100644 --- a/textsplitter/recursiveTextSplitter.go +++ b/textsplitter/recursiveTextSplitter.go @@ -8,7 +8,7 @@ import ( ) var ( - defaultSeparators []string = []string{"\n\n", "\n", " ", ""} + defaultSeparators = []string{"\n\n", "\n", " ", ""} defaultLengthFunction LenFunction = func(s string) int { return len(s) } ) @@ -33,7 +33,9 @@ func (r *RecursiveCharacterTextSplitter) WithSeparators(separators []string) *Re return r } -func (r *RecursiveCharacterTextSplitter) WithLengthFunction(lengthFunction LenFunction) *RecursiveCharacterTextSplitter { +func (r *RecursiveCharacterTextSplitter) WithLengthFunction( + lengthFunction LenFunction, +) *RecursiveCharacterTextSplitter { r.lengthFunction = lengthFunction return r } diff --git a/textsplitter/textsplitter.go b/textsplitter/textsplitter.go index 3dfd3298..5765ce13 100644 --- a/textsplitter/textsplitter.go +++ b/textsplitter/textsplitter.go @@ -13,34 +13,34 @@ type TextSplitter struct { lengthFunction LenFunction } -func (p *TextSplitter) mergeSplits(splits []string, separator string) []string { +func (t *TextSplitter) mergeSplits(splits []string, separator string) []string { docs := make([]string, 0) - current_doc := make([]string, 0) + currentDoc := make([]string, 0) total := 0 for _, d := range splits { - splitLen := p.lengthFunction(d) + splitLen := t.lengthFunction(d) - if total+splitLen+getSLen(current_doc, separator, 0) > p.chunkSize { - if total > p.chunkSize { - log.Printf("Created a chunk of size %d, which is longer than the specified %d", total, p.chunkSize) + if total+splitLen+getSLen(currentDoc, separator, 0) > t.chunkSize { + if total > t.chunkSize { + log.Printf("Created a chunk of size %d, which is longer than the specified %d", total, t.chunkSize) } - if len(current_doc) > 0 { - doc := p.joinDocs(current_doc, separator) + if len(currentDoc) > 0 { + doc := t.joinDocs(currentDoc, separator) if doc != "" { docs = append(docs, doc) } - for (total > p.chunkOverlap) || (getSLen(current_doc, separator, 0) > p.chunkSize) && total > 0 { - total -= p.lengthFunction(current_doc[0]) + getSLen(current_doc, separator, 1) - current_doc = current_doc[1:] + for (total > t.chunkOverlap) || (getSLen(currentDoc, separator, 0) > t.chunkSize) && total > 0 { + total -= t.lengthFunction(currentDoc[0]) + getSLen(currentDoc, separator, 1) + currentDoc = currentDoc[1:] } } } - current_doc = append(current_doc, d) - total += getSLen(current_doc, separator, 1) + currentDoc = append(currentDoc, d) + total += getSLen(currentDoc, separator, 1) total += splitLen } - doc := p.joinDocs(current_doc, separator) + doc := t.joinDocs(currentDoc, separator) if doc != "" { docs = append(docs, doc) } @@ -52,8 +52,8 @@ func (t *TextSplitter) joinDocs(docs []string, separator string) string { return strings.TrimSpace(text) } -func getSLen(current_doc []string, separator string, compareLen int) int { - if len(current_doc) > compareLen { +func getSLen(currentDoc []string, separator string, compareLen int) int { + if len(currentDoc) > compareLen { return len(separator) } diff --git a/transformer/cohere-rerank.go b/transformer/cohere-rerank.go index 7155fa35..f8bd2f21 100644 --- a/transformer/cohere-rerank.go +++ b/transformer/cohere-rerank.go @@ -60,15 +60,17 @@ func (c *CohereRerank) WithModel(model CohereRerankModel) *CohereRerank { return c } -func (c *CohereRerank) Rerank(ctx context.Context, query string, documents []document.Document) ([]document.Document, error) { - +func (c *CohereRerank) Rerank( + ctx context.Context, + query string, documents []document.Document, +) ([]document.Document, error) { if c.topN == defaultCohereRerankTopN { c.topN = len(documents) } resp := &response.Rerank{} err := c.client.Rerank( - context.Background(), + ctx, &request.Rerank{ ReturnDocuments: false, MaxChunksPerDoc: &c.maxChunksPerDoc, @@ -93,7 +95,10 @@ func (c *CohereRerank) documentsToStringSlice(documents []document.Document) []s return strings } -func (c *CohereRerank) rerankDocuments(documents []document.Document, results []model.RerankResult) []document.Document { +func (c *CohereRerank) rerankDocuments( + documents []document.Document, + results []model.RerankResult, +) []document.Document { rerankedDocuments := make([]document.Document, 0) for _, result := range results { diff --git a/transformer/visual-question-answering.go b/transformer/visual-question-answering.go index 4a5f9936..2a15abf5 100644 --- a/transformer/visual-question-answering.go +++ b/transformer/visual-question-answering.go @@ -61,7 +61,7 @@ func (v *VisualQuestionAnswering) WithImage(mediaFile string) *VisualQuestionAns func (v *VisualQuestionAnswering) Transform(ctx context.Context, input string, all bool) (any, error) { - respJSON, err := hfVisualQuestionAnsweringHttpCall(ctx, v.token, v.model, v.mediaFile, input) + respJSON, err := hfVisualQuestionAnsweringHTTPCall(ctx, v.token, v.model, v.mediaFile, input) if err != nil { return "", err } @@ -79,7 +79,7 @@ func (v *VisualQuestionAnswering) Transform(ctx context.Context, input string, a return resp[0].Answer, nil } -func hfVisualQuestionAnsweringHttpCall(ctx context.Context, token, model, mediaFile, question string) ([]byte, error) { +func hfVisualQuestionAnsweringHTTPCall(ctx context.Context, token, model, mediaFile, question string) ([]byte, error) { var inputs VisualQuestionAnsweringRequest @@ -119,7 +119,7 @@ func hfVisualQuestionAnsweringHttpCall(ctx context.Context, token, model, mediaF return nil, err } - err = hfCheckHttpResponse(respBody) + err = hfCheckHTTPResponse(respBody) if err != nil { return nil, err } @@ -127,7 +127,7 @@ func hfVisualQuestionAnsweringHttpCall(ctx context.Context, token, model, mediaF return respBody, nil } -func hfCheckHttpResponse(respJSON []byte) error { +func hfCheckHTTPResponse(respJSON []byte) error { type apiError struct { Error string `json:"error,omitempty"` From fc33fec2bb74e3c42194af8bcca76eec7b3ea434 Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:05:47 +0200 Subject: [PATCH 2/9] chore: whitespace lint --- .golangci.yml | 2 +- chat/chat_test.go | 1 - document/document.go | 1 - embedder/embedding.go | 1 - embedder/huggingface/feature_extraction.go | 1 - embedder/huggingface/http.go | 1 - embedder/llamacpp/llamacpp.go | 3 --- embedder/openai/math.go | 1 - embedder/openai/openai.go | 10 ---------- embedder/openai/openai_test.go | 1 - embedder/openai/token.go | 1 - index/pinecone/pinecone.go | 16 ---------------- index/qdrant/qdrant.go | 13 ------------- index/simpleVectorIndex/simpleVectorIndex.go | 5 ----- llm/cache/cache.go | 1 - llm/cohere/cohere.go | 1 - llm/huggingface/conversational.go | 1 - llm/huggingface/http.go | 1 - llm/huggingface/huggingface.go | 2 -- llm/huggingface/textgeneration.go | 1 - llm/llamacpp/llamacpp.go | 1 - llm/mock/mock.go | 14 ++++++++++---- llm/openai/function.go | 1 - llm/openai/openai.go | 12 ------------ loader/csv.go | 2 -- loader/directory.go | 5 ----- loader/hf_image_to_text.go | 2 -- loader/hf_speech_recognition.go | 1 - loader/libreoffice.go | 2 -- loader/loader.go | 1 - loader/pdf_to_text.go | 2 -- loader/pubmed.go | 3 --- loader/tesseract.go | 2 -- loader/whisper.go | 2 -- loader/whispercpp.go | 2 -- loader/youtube-dl.go | 4 ---- pipeline/pipeline.go | 8 -------- pipeline/qa/qa.go | 3 --- pipeline/splitter.go | 2 -- pipeline/sql/sql.go | 11 ----------- pipeline/sql/sqlite.go | 2 -- pipeline/summarize/summarize.go | 2 -- pipeline/tube.go | 3 --- prompt/template.go | 3 --- textsplitter/recursiveTextSplitter.go | 2 -- textsplitter/textsplitter.go | 1 - transformer/cohere-rerank.go | 1 - transformer/dall-e.go | 2 -- transformer/visual-question-answering.go | 4 ---- types/types.go | 1 - 50 files changed, 11 insertions(+), 154 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 6f3b6898..1149ef52 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -59,5 +59,5 @@ linters: #- unparam - unused # - varcheck ! deprecated since v1.49.0; replaced by 'unused' - #- whitespace + - whitespace fast: false \ No newline at end of file diff --git a/chat/chat_test.go b/chat/chat_test.go index 3e4b3e0e..b1f72122 100644 --- a/chat/chat_test.go +++ b/chat/chat_test.go @@ -9,7 +9,6 @@ import ( ) func TestChat_ToMessages(t *testing.T) { - prompt1 := prompt.NewPromptTemplate( "You are a helpful assistant that translates {{.input_language}} to {{.output_language}}.").WithInputs( types.M{ diff --git a/document/document.go b/document/document.go index 1ca1ec12..8196fc55 100644 --- a/document/document.go +++ b/document/document.go @@ -28,7 +28,6 @@ func (d *Document) GetContent() string { // GetEnrichedContent returns the document content with the metadata appended func (d *Document) GetEnrichedContent() string { - if d.Metadata == nil { return d.Content } diff --git a/embedder/embedding.go b/embedder/embedding.go index b371a893..478c4b89 100644 --- a/embedder/embedding.go +++ b/embedder/embedding.go @@ -8,7 +8,6 @@ var ( type Embedding []float64 func (e Embedding) ToFloat32() []float32 { - vect := make([]float32, len(e)) for i, v := range e { vect[i] = float32(v) diff --git a/embedder/huggingface/feature_extraction.go b/embedder/huggingface/feature_extraction.go index c6659647..002550ff 100644 --- a/embedder/huggingface/feature_extraction.go +++ b/embedder/huggingface/feature_extraction.go @@ -17,7 +17,6 @@ type featureExtractionRequest struct { } func (h *HuggingFaceEmbedder) featureExtraction(ctx context.Context, text []string) ([]embedder.Embedding, error) { - isTrue := true request := featureExtractionRequest{ Inputs: text, diff --git a/embedder/huggingface/http.go b/embedder/huggingface/http.go index 39d19bf9..0634072d 100644 --- a/embedder/huggingface/http.go +++ b/embedder/huggingface/http.go @@ -12,7 +12,6 @@ import ( const APIBaseURL = "https://api-inference.huggingface.co/pipeline/feature-extraction/" func (h *HuggingFaceEmbedder) doRequest(ctx context.Context, jsonBody []byte, model string) ([]byte, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodPost, APIBaseURL+model, bytes.NewBuffer(jsonBody)) if err != nil { return nil, err diff --git a/embedder/llamacpp/llamacpp.go b/embedder/llamacpp/llamacpp.go index 747dffd1..c7d7868a 100644 --- a/embedder/llamacpp/llamacpp.go +++ b/embedder/llamacpp/llamacpp.go @@ -56,7 +56,6 @@ func (l *LlamaCppEmbedder) Embed(ctx context.Context, texts []string) ([]embedde } func (l *LlamaCppEmbedder) embed(ctx context.Context, text string) (embedder.Embedding, error) { - _, err := os.Stat(l.llamacppPath) if err != nil { return nil, err @@ -72,11 +71,9 @@ func (l *LlamaCppEmbedder) embed(ctx context.Context, text string) (embedder.Emb } return parseEmbeddings(string(out)) - } func parseEmbeddings(str string) (embedder.Embedding, error) { - strSlice := strings.Split(strings.TrimSpace(str), " ") floatSlice := make([]float64, len(strSlice)) for i, s := range strSlice { diff --git a/embedder/openai/math.go b/embedder/openai/math.go index f5e3a888..12d60a15 100644 --- a/embedder/openai/math.go +++ b/embedder/openai/math.go @@ -7,7 +7,6 @@ import ( ) func normalizeEmbeddings(embeddings []embedder.Embedding, lens []float64) embedder.Embedding { - chunkAvgEmbeddings := average(embeddings, lens) norm := norm(chunkAvgEmbeddings) diff --git a/embedder/openai/openai.go b/embedder/openai/openai.go index 431f6b5c..1cff94ce 100644 --- a/embedder/openai/openai.go +++ b/embedder/openai/openai.go @@ -94,7 +94,6 @@ func (o *OpenAIEmbedder) concurrentEmbed( texts []string, maxTokens int, ) ([]embedder.Embedding, error) { - type indexedEmbeddings struct { index int embedding embedder.Embedding @@ -105,7 +104,6 @@ func (o *OpenAIEmbedder) concurrentEmbed( embeddingsChan := make(chan indexedEmbeddings, len(texts)) for i, text := range texts { - go func(ctx context.Context, i int, text string, maxTokens int) { embedding, err := o.safeEmbed(ctx, text, maxTokens) @@ -114,9 +112,7 @@ func (o *OpenAIEmbedder) concurrentEmbed( embedding: embedding, err: err, } - }(ctx, i, text, maxTokens) - } var err error @@ -146,7 +142,6 @@ func (o *OpenAIEmbedder) concurrentEmbed( } func (o *OpenAIEmbedder) safeEmbed(ctx context.Context, text string, maxTokens int) (embedder.Embedding, error) { - sanitizedText := text if strings.HasSuffix(o.model.String(), "001") { sanitizedText = strings.ReplaceAll(text, "\n", " ") @@ -163,11 +158,9 @@ func (o *OpenAIEmbedder) safeEmbed(ctx context.Context, text string, maxTokens i } return normalizeEmbeddings(embeddingsForChunks, chunkLens), nil - } func (o *OpenAIEmbedder) chunkText(text string, maxTokens int) ([]string, error) { - tokens, err := o.textToTokens(text) if err != nil { return nil, err @@ -195,7 +188,6 @@ func (o *OpenAIEmbedder) getEmebeddingsForChunks( ctx context.Context, chunks []string, ) ([]embedder.Embedding, []float64, error) { - chunkLens := []float64{} embeddingsForChunks, err := o.openAICreateEmebeddings(ctx, chunks) @@ -208,11 +200,9 @@ func (o *OpenAIEmbedder) getEmebeddingsForChunks( } return embeddingsForChunks, chunkLens, nil - } func (o *OpenAIEmbedder) openAICreateEmebeddings(ctx context.Context, texts []string) ([]embedder.Embedding, error) { - resp, err := o.openAIClient.CreateEmbeddings( ctx, openai.EmbeddingRequest{ diff --git a/embedder/openai/openai_test.go b/embedder/openai/openai_test.go index 7aadd37b..fde710fd 100644 --- a/embedder/openai/openai_test.go +++ b/embedder/openai/openai_test.go @@ -8,7 +8,6 @@ import ( ) func Test_openAIEmbedder_splitText(t *testing.T) { - type fields struct { openAIClient *openai.Client model Model diff --git a/embedder/openai/token.go b/embedder/openai/token.go index 242c3898..0b00b39f 100644 --- a/embedder/openai/token.go +++ b/embedder/openai/token.go @@ -14,7 +14,6 @@ func (o *OpenAIEmbedder) textToTokens(text string) ([]int, error) { } func (o *OpenAIEmbedder) getMaxTokens() int { - if tiktoken.MODEL_TO_ENCODING[o.model.String()] == "cl100k_base" { return 8191 } diff --git a/index/pinecone/pinecone.go b/index/pinecone/pinecone.go index e0754bf4..946fb368 100644 --- a/index/pinecone/pinecone.go +++ b/index/pinecone/pinecone.go @@ -49,7 +49,6 @@ type Options struct { } func New(options Options, embedder index.Embedder) *Index { - apiKey := os.Getenv("PINECONE_API_KEY") environment := os.Getenv("PINECONE_ENVIRONMENT") @@ -77,7 +76,6 @@ func (p *Index) WithAPIKeyAndEnvironment(apiKey, environment string) *Index { } func (p *Index) LoadFromDocuments(ctx context.Context, documents []document.Document) error { - err := p.createIndexIfRequired(ctx) if err != nil { return fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -91,7 +89,6 @@ func (p *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu } func (p *Index) IsEmpty(ctx context.Context) (bool, error) { - err := p.createIndexIfRequired(ctx) if err != nil { return true, fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -123,7 +120,6 @@ func (p *Index) IsEmpty(ctx context.Context) (bool, error) { } return *namespace.VectorCount == 0, nil - } func (p *Index) Add(ctx context.Context, item *index.Data) error { @@ -152,7 +148,6 @@ func (p *Index) Add(ctx context.Context, item *index.Data) error { } func (p *Index) Search(ctx context.Context, values []float64, opts ...option.Option) (index.SearchResults, error) { - pineconeOptions := &option.Options{ TopK: defaultTopK, } @@ -176,7 +171,6 @@ func (p *Index) Search(ctx context.Context, values []float64, opts ...option.Opt } func (p *Index) Query(ctx context.Context, query string, opts ...option.Option) (index.SearchResults, error) { - pineconeOptions := &option.Options{ TopK: defaultTopK, } @@ -200,7 +194,6 @@ func (p *Index) Query(ctx context.Context, query string, opts ...option.Option) } func (p *Index) query(ctx context.Context, query string, opts *option.Options) ([]pineconeresponse.QueryMatch, error) { - embeddings, err := p.embedder.Embed(ctx, []string{query}) if err != nil { return nil, err @@ -242,7 +235,6 @@ func (p *Index) similaritySearch( } func (p *Index) getProjectID(ctx context.Context) error { - if p.projectID != nil { return nil } @@ -260,7 +252,6 @@ func (p *Index) getProjectID(ctx context.Context) error { } func (p *Index) createIndexIfRequired(ctx context.Context) error { - if p.createIndex == nil { return nil } @@ -311,13 +302,10 @@ func (p *Index) createIndexIfRequired(ctx context.Context) error { time.Sleep(1 * time.Second) } } - } func (p *Index) batchUpsert(ctx context.Context, documents []document.Document) error { - for i := 0; i < len(documents); i += p.batchUpsertSize { - batchEnd := i + p.batchUpsertSize if batchEnd > len(documents) { batchEnd = len(documents) @@ -348,7 +336,6 @@ func (p *Index) batchUpsert(ctx context.Context, documents []document.Document) } func (p *Index) vectorUpsert(ctx context.Context, vectors []pineconerequest.Vector) error { - err := p.getProjectID(ctx) if err != nil { return fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -380,11 +367,9 @@ func buildPineconeVectorsFromEmbeddingsAndDocuments( startIndex int, includeContent bool, ) ([]pineconerequest.Vector, error) { - var vectors []pineconerequest.Vector for i, embedding := range embeddings { - metadata := index.DeepCopyMetadata(documents[startIndex+i].Metadata) // inject document content into vector metadata @@ -417,7 +402,6 @@ func buildSearchResultsFromPineconeMatches( searchResults := make([]index.SearchResult, len(matches)) for i, match := range matches { - metadata := index.DeepCopyMetadata(match.Metadata) if !includeContent { delete(metadata, index.DefaultKeyContent) diff --git a/index/qdrant/qdrant.go b/index/qdrant/qdrant.go index b9244f62..e9c0a87c 100644 --- a/index/qdrant/qdrant.go +++ b/index/qdrant/qdrant.go @@ -52,7 +52,6 @@ type Options struct { } func New(options Options, embedder index.Embedder) *Index { - apiKey := os.Getenv("QDRANT_API_KEY") endpoint := os.Getenv("QDRANT_ENDPOINT") @@ -79,7 +78,6 @@ func (q *Index) WithAPIKeyAndEdpoint(apiKey, endpoint string) *Index { } func (q *Index) LoadFromDocuments(ctx context.Context, documents []document.Document) error { - err := q.createCollectionIfRequired(ctx) if err != nil { return fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -93,7 +91,6 @@ func (q *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu } func (q *Index) IsEmpty(ctx context.Context) (bool, error) { - err := q.createCollectionIfRequired(ctx) if err != nil { return true, fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -112,7 +109,6 @@ func (q *Index) IsEmpty(ctx context.Context) (bool, error) { } return res.Result.VectorsCount == 0, nil - } func (q *Index) Add(ctx context.Context, item *index.Data) error { @@ -160,7 +156,6 @@ func (q *Index) Search(ctx context.Context, values []float64, opts ...option.Opt } func (q *Index) Query(ctx context.Context, query string, opts ...option.Option) (index.SearchResults, error) { - qdrantOptions := &option.Options{ TopK: defaultTopK, } @@ -184,7 +179,6 @@ func (q *Index) similaritySearch( values []float64, opts *option.Options, ) ([]qdrantresponse.PointSearchResult, error) { - if opts.Filter == nil { opts.Filter = qdrantrequest.Filter{} } @@ -223,7 +217,6 @@ func (q *Index) query( } func (q *Index) createCollectionIfRequired(ctx context.Context) error { - if q.createCollection == nil { return nil } @@ -258,9 +251,7 @@ func (q *Index) createCollectionIfRequired(ctx context.Context) error { } func (q *Index) batchUpsert(ctx context.Context, documents []document.Document) error { - for i := 0; i < len(documents); i += q.batchUpsertSize { - batchEnd := i + q.batchUpsertSize if batchEnd > len(documents) { batchEnd = len(documents) @@ -291,7 +282,6 @@ func (q *Index) batchUpsert(ctx context.Context, documents []document.Document) } func (q *Index) pointUpsert(ctx context.Context, points []qdrantrequest.Point) error { - wait := true req := &qdrantrequest.PointUpsert{ Wait: &wait, @@ -314,11 +304,9 @@ func buildQdrantPointsFromEmbeddingsAndDocuments( startIndex int, includeContent bool, ) ([]qdrantrequest.Point, error) { - var vectors []qdrantrequest.Point for i, embedding := range embeddings { - metadata := index.DeepCopyMetadata(documents[startIndex+i].Metadata) // inject document content into vector metadata @@ -351,7 +339,6 @@ func buildSearchResultsFromQdrantMatches( searchResults := make([]index.SearchResult, len(matches)) for i, match := range matches { - metadata := index.DeepCopyMetadata(match.Payload) if !includeContent { delete(metadata, index.DefaultKeyContent) diff --git a/index/simpleVectorIndex/simpleVectorIndex.go b/index/simpleVectorIndex/simpleVectorIndex.go index 749830c2..e2beab36 100644 --- a/index/simpleVectorIndex/simpleVectorIndex.go +++ b/index/simpleVectorIndex/simpleVectorIndex.go @@ -54,7 +54,6 @@ func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu } for i := 0; i < len(documents); i += defaultBatchSize { - end := i + defaultBatchSize if end > len(documents) { end = len(documents) @@ -77,7 +76,6 @@ func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu } s.data = append(s.data, buildDataFromEmbeddingAndDocument(id.String(), embeddings[j], document)) } - } err = s.save() @@ -103,7 +101,6 @@ func buildDataFromEmbeddingAndDocument( } func (s Index) save() error { - jsonContent, err := json.Marshal(s.data) if err != nil { return err @@ -134,7 +131,6 @@ func (s *Index) database() string { } func (s *Index) IsEmpty() (bool, error) { - err := s.load() if err != nil { return true, fmt.Errorf("%s: %w", index.ErrInternal, err) @@ -188,7 +184,6 @@ func (s *Index) Search(ctx context.Context, values []float64, opts ...option.Opt } func (s *Index) Query(ctx context.Context, query string, opts ...option.Option) (index.SearchResults, error) { - sviOptions := &option.Options{ TopK: defaultTopK, } diff --git a/llm/cache/cache.go b/llm/cache/cache.go index bee8c598..6776ff34 100644 --- a/llm/cache/cache.go +++ b/llm/cache/cache.go @@ -59,7 +59,6 @@ func (c *Cache) WithScoreThreshold(scoreThreshold float64) *Cache { } func (c *Cache) Get(ctx context.Context, query string) (*Result, error) { - embedding, err := c.embedder.Embed(ctx, []string{query}) if err != nil { return nil, err diff --git a/llm/cohere/cohere.go b/llm/cohere/cohere.go index 4f326836..ae9c9a22 100644 --- a/llm/cohere/cohere.go +++ b/llm/cohere/cohere.go @@ -84,7 +84,6 @@ func (c *Cohere) WithStop(stop []string) *Cohere { // Completion returns the completion for the given prompt func (c *Cohere) Completion(ctx context.Context, prompt string) (string, error) { - resp := &response.Generate{} err := c.client.Generate( ctx, diff --git a/llm/huggingface/conversational.go b/llm/huggingface/conversational.go index e439af44..48a34d1e 100644 --- a/llm/huggingface/conversational.go +++ b/llm/huggingface/conversational.go @@ -38,7 +38,6 @@ type conversation struct { } func (h *HuggingFace) conversationalCompletion(ctx context.Context, prompt string) (string, error) { - isTrue := true request := conversationalRequest{ Inputs: conversationalInputs{ diff --git a/llm/huggingface/http.go b/llm/huggingface/http.go index 2f3c0125..00c229ac 100644 --- a/llm/huggingface/http.go +++ b/llm/huggingface/http.go @@ -10,7 +10,6 @@ import ( ) func (h *HuggingFace) doRequest(ctx context.Context, jsonBody []byte, model string) ([]byte, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodPost, APIBaseURL+model, bytes.NewBuffer(jsonBody)) if err != nil { return nil, err diff --git a/llm/huggingface/huggingface.go b/llm/huggingface/huggingface.go index e9d7f7e2..43b68b8d 100644 --- a/llm/huggingface/huggingface.go +++ b/llm/huggingface/huggingface.go @@ -106,7 +106,6 @@ func (h *HuggingFace) WithHTTPClient(httpClient *http.Client) *HuggingFace { // Completion returns the completion for the given prompt func (h *HuggingFace) Completion(ctx context.Context, prompt string) (string, error) { - var output string var outputs []string var err error @@ -131,7 +130,6 @@ func (h *HuggingFace) Completion(ctx context.Context, prompt string) (string, er // BatchCompletion returns the completion for the given prompts func (h *HuggingFace) BatchCompletion(ctx context.Context, prompts []string) ([]string, error) { - var outputs []string var err error switch h.mode { diff --git a/llm/huggingface/textgeneration.go b/llm/huggingface/textgeneration.go index 82c79099..26f42f53 100644 --- a/llm/huggingface/textgeneration.go +++ b/llm/huggingface/textgeneration.go @@ -33,7 +33,6 @@ func (tgs textGenerationResponseSequence) String() string { } func (h *HuggingFace) textgenerationCompletion(ctx context.Context, prompts []string) ([]string, error) { - numSequences := 1 isTrue := true diff --git a/llm/llamacpp/llamacpp.go b/llm/llamacpp/llamacpp.go index 96ae0bb1..0ccbd21d 100644 --- a/llm/llamacpp/llamacpp.go +++ b/llm/llamacpp/llamacpp.go @@ -67,7 +67,6 @@ func (l *Llamacpp) WithArgs(llamacppArgs []string) *Llamacpp { } func (l *Llamacpp) Completion(ctx context.Context, prompt string) (string, error) { - _, err := os.Stat(l.llamacppPath) if err != nil { return "", err diff --git a/llm/mock/mock.go b/llm/mock/mock.go index 21106d18..a9eb9aaa 100644 --- a/llm/mock/mock.go +++ b/llm/mock/mock.go @@ -1,4 +1,3 @@ -// nolint // Package llmmock provides a mock implementation of the LLM interface. package llmmock @@ -16,9 +15,11 @@ type LlmMock struct { } func (l *LlmMock) Completion(ctx context.Context, prompt string) (string, error) { + _ = ctx fmt.Printf("User: %s\n", prompt) rand.Seed(time.Now().UnixNano()) + //nolint:gosec number := rand.Intn(3) + 3 randomStrings := getRandomStrings(number) @@ -33,7 +34,7 @@ func (l *LlmMock) Completion(ctx context.Context, prompt string) (string, error) } func (l *LlmMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { - + _ = ctx messages, err := prompt.ToMessages() if err != nil { return "", err @@ -50,6 +51,7 @@ func (l *LlmMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { } rand.Seed(time.Now().UnixNano()) + //nolint:gosec number := rand.Intn(3) + 3 randomStrings := getRandomStrings(number) @@ -66,9 +68,12 @@ func (l *LlmMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { type JSONLllMock struct{} func (l *JSONLllMock) Completion(ctx context.Context, prompt string) (string, error) { + _ = ctx fmt.Printf("User: %s\n", prompt) + //nolint:gosec rand.Seed(time.Now().UnixNano()) + //nolint:gosec output := `{"first": "` + strings.Join(getRandomStrings(rand.Intn(5)+1), " ") + `", "second": "` + strings.Join(getRandomStrings(rand.Intn(5)+1), " ") + `"}` @@ -83,7 +88,6 @@ func (l *JSONLllMock) Completion(ctx context.Context, prompt string) (string, er // getRandomStrings returns a random selection of strings from the data slice. // this function has been generate by AI! ;) func getRandomStrings(number int) []string { - data := []string{"air", "fly", "ball", "kite", "tree", "grass", "house", "ocean", "river", "lake", "road", "bridge", "mountain", "valley", "desert", "flower", "wind", "book", "table", "chair", "television", "computer", "window", "door", "cup", "plate", "spoon", "fork", "knife", "bottle", "glass"} @@ -93,6 +97,7 @@ func getRandomStrings(number int) []string { result := []string{} for i := 0; i < number; i++ { + //nolint:gosec result = append(result, data[rand.Intn(len(data))]) } @@ -100,7 +105,7 @@ func getRandomStrings(number int) []string { } func (l *JSONLllMock) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { - + _ = ctx messages, err := prompt.ToMessages() if err != nil { return "", err @@ -118,6 +123,7 @@ func (l *JSONLllMock) Chat(ctx context.Context, prompt *chat.Chat) (string, erro rand.Seed(time.Now().UnixNano()) + //nolint:gosec output := `{"first": "` + strings.Join(getRandomStrings(rand.Intn(5)+1), " ") + `", "second": "` + strings.Join(getRandomStrings(rand.Intn(5)+1), " ") + `"}` diff --git a/llm/openai/function.go b/llm/openai/function.go index 5817318a..4e5b8109 100644 --- a/llm/openai/function.go +++ b/llm/openai/function.go @@ -50,7 +50,6 @@ func (o *OpenAI) BindFunction( } func (o *OpenAI) getFunctions() []openai.FunctionDefinition { - functions := []openai.FunctionDefinition{} for _, function := range o.functions { diff --git a/llm/openai/openai.go b/llm/openai/openai.go index ad0704f2..19147d4b 100644 --- a/llm/openai/openai.go +++ b/llm/openai/openai.go @@ -76,7 +76,6 @@ type OpenAI struct { } func New(model Model, temperature float32, maxTokens int, verbose bool) *OpenAI { - openAIKey := os.Getenv("OPENAI_API_KEY") return &OpenAI{ @@ -197,7 +196,6 @@ func (o *OpenAI) Completion(ctx context.Context, prompt string) (string, error) // BatchCompletion returns multiple completions for the given prompts. func (o *OpenAI) BatchCompletion(ctx context.Context, prompts []string) ([]string, error) { - response, err := o.openAIClient.CreateCompletion( ctx, openai.CompletionRequest{ @@ -242,7 +240,6 @@ func (o *OpenAI) CompletionStream(ctx context.Context, callbackFn StreamCallback // BatchCompletionStream returns multiple completion streams for the given prompts. func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamCallback, prompts []string) error { - stream, err := o.openAIClient.CreateCompletionStream( ctx, openai.CompletionRequest{ @@ -262,7 +259,6 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC defer stream.Close() for { - response, err := stream.Recv() if errors.Is(err, io.EOF) { break @@ -289,7 +285,6 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC callbackFn[index](output) } - } return nil @@ -297,7 +292,6 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC // Chat returns a single chat completion for the given prompt. func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { - messages, err := buildMessages(prompt) if err != nil { return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) @@ -359,7 +353,6 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { // ChatStream returns a single chat stream for the given prompt. func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prompt *chat.Chat) error { - messages, err := buildMessages(prompt) if err != nil { return fmt.Errorf("%s: %w", ErrOpenAIChat, err) @@ -382,7 +375,6 @@ func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prom } for { - response, err := stream.Recv() if errors.Is(err, io.EOF) { break @@ -415,7 +407,6 @@ func (o *OpenAI) SetStop(stop []string) { } func (o *OpenAI) setUsageMetadata(usage openai.Usage) { - callbackMetadata := make(types.Meta) err := mapstructure.Decode(usage, &callbackMetadata) @@ -427,7 +418,6 @@ func (o *OpenAI) setUsageMetadata(usage openai.Usage) { } func buildMessages(prompt *chat.Chat) ([]openai.ChatCompletionMessage, error) { - var messages []openai.ChatCompletionMessage promptMessages, err := prompt.ToMessages() @@ -452,7 +442,6 @@ func buildMessages(prompt *chat.Chat) ([]openai.ChatCompletionMessage, error) { Content: message.Content, }) } else if message.Type == chat.MessageTypeFunction { - fnmessage := openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleFunction, Content: message.Content, @@ -469,7 +458,6 @@ func buildMessages(prompt *chat.Chat) ([]openai.ChatCompletionMessage, error) { } func debugChat(prompt *chat.Chat, content string) { - promptMessages, err := prompt.ToMessages() if err != nil { return diff --git a/loader/csv.go b/loader/csv.go index 2f0dfa7a..24f0d004 100644 --- a/loader/csv.go +++ b/loader/csv.go @@ -58,7 +58,6 @@ func (c *CSVLoader) Load(ctx context.Context) ([]document.Document, error) { } func (c *CSVLoader) validate() error { - fileStat, err := os.Stat(c.filename) if err != nil { return fmt.Errorf("%s: %w", ErrorInternal, err) @@ -95,7 +94,6 @@ func (c *CSVLoader) readCSV() ([]document.Document, error) { } if titles == nil { - titles = make([]string, len(record)) for i, r := range record { titles[i] = strings.ReplaceAll(r, "\"", "") diff --git a/loader/directory.go b/loader/directory.go index c4a70561..0e9f024a 100644 --- a/loader/directory.go +++ b/loader/directory.go @@ -19,12 +19,10 @@ type DirectoryLoader struct { } func NewDirectoryLoader(dirname string, regExPathMatch string) *DirectoryLoader { - return &DirectoryLoader{ dirname: dirname, regExPathMatch: regExPathMatch, } - } func (d *DirectoryLoader) WithTextSplitter(textSplitter TextSplitter) *DirectoryLoader { @@ -33,7 +31,6 @@ func (d *DirectoryLoader) WithTextSplitter(textSplitter TextSplitter) *Directory } func (d *DirectoryLoader) Load(ctx context.Context) ([]document.Document, error) { - err := d.validate() if err != nil { return nil, err @@ -48,7 +45,6 @@ func (d *DirectoryLoader) Load(ctx context.Context) ([]document.Document, error) err = filepath.Walk(d.dirname, func(path string, info os.FileInfo, err error) error { if err == nil && regExp.MatchString(info.Name()) { - d, err := NewTextLoader(path, nil).Load(ctx) if err != nil { return err @@ -70,7 +66,6 @@ func (d *DirectoryLoader) Load(ctx context.Context) ([]document.Document, error) } func (d *DirectoryLoader) validate() error { - fileStat, err := os.Stat(d.dirname) if err != nil { return fmt.Errorf("%s: %w", ErrorInternal, err) diff --git a/loader/hf_image_to_text.go b/loader/hf_image_to_text.go index d2df8723..ff6bea3a 100644 --- a/loader/hf_image_to_text.go +++ b/loader/hf_image_to_text.go @@ -56,7 +56,6 @@ func (h *HFImageToText) WithTextSplitter(textSplitter TextSplitter) *HFImageToTe } func (h *HFImageToText) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(h.mediaFile) if err != nil { return nil, fmt.Errorf("%s: %w", ErrorInternal, err) @@ -132,7 +131,6 @@ func hfMediaHTTPCall(ctx context.Context, token, model, mediaFile string) ([]byt } func hfCheckHTTPResponse(respJSON []byte) error { - type apiError struct { Error string `json:"error,omitempty"` } diff --git a/loader/hf_speech_recognition.go b/loader/hf_speech_recognition.go index 0b97d7da..23505923 100644 --- a/loader/hf_speech_recognition.go +++ b/loader/hf_speech_recognition.go @@ -51,7 +51,6 @@ func (h *HFSpeechRecognition) WithTextSplitter(textSplitter TextSplitter) *HFSpe } func (h *HFSpeechRecognition) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(h.mediaFile) if err != nil { return nil, fmt.Errorf("%s: %w", ErrorInternal, err) diff --git a/loader/libreoffice.go b/loader/libreoffice.go index 066207f8..426305bb 100644 --- a/loader/libreoffice.go +++ b/loader/libreoffice.go @@ -46,7 +46,6 @@ func (l *LibreOfficeLoader) WithArgs(libreOfficeArgs []string) *LibreOfficeLoade } func (l *LibreOfficeLoader) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(l.libreOfficePath) if err != nil { return nil, ErrLibreOfficeNotFound @@ -70,7 +69,6 @@ func (l *LibreOfficeLoader) Load(ctx context.Context) ([]document.Document, erro } func (l *LibreOfficeLoader) loadFile(ctx context.Context) ([]document.Document, error) { - libreOfficeArgs := append(l.libreOfficeArgs, l.filename) //nolint:gosec diff --git a/loader/loader.go b/loader/loader.go index b52a8c71..5c647244 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -24,7 +24,6 @@ type Loader struct { } func isFile(filename string) error { - fileStat, err := os.Stat(filename) if err != nil { return fmt.Errorf("%s: %w", ErrorInternal, err) diff --git a/loader/pdf_to_text.go b/loader/pdf_to_text.go index ee0d7851..21f8e921 100644 --- a/loader/pdf_to_text.go +++ b/loader/pdf_to_text.go @@ -42,7 +42,6 @@ func (p *PDFLoader) WithTextSplitter(textSplitter TextSplitter) *PDFLoader { } func (p *PDFLoader) Load(ctx context.Context) ([]document.Document, error) { - _, err := os.Stat(p.pdfToTextPath) if err != nil { return nil, ErrPdfToTextNotFound @@ -93,7 +92,6 @@ func (p *PDFLoader) loadDir(ctx context.Context) ([]document.Document, error) { err := filepath.Walk(p.path, func(path string, info os.FileInfo, err error) error { if err == nil && strings.HasSuffix(info.Name(), ".pdf") { - d, err := NewPDFToTextLoader(path).WithPDFToTextPath(p.pdfToTextPath).loadFile(ctx) if err != nil { return err diff --git a/loader/pubmed.go b/loader/pubmed.go index a8c925ec..f3ad6cd0 100644 --- a/loader/pubmed.go +++ b/loader/pubmed.go @@ -39,11 +39,9 @@ func (p *PubMedLoader) WithTextSplitter(textSplitter TextSplitter) *PubMedLoader } func (p *PubMedLoader) Load(ctx context.Context) ([]document.Document, error) { - documens := make([]document.Document, len(p.pubMedIDs)) for i, pubMedID := range p.pubMedIDs { - doc, err := p.load(ctx, pubMedID) if err != nil { return nil, err @@ -60,7 +58,6 @@ func (p *PubMedLoader) Load(ctx context.Context) ([]document.Document, error) { } func (p *PubMedLoader) load(ctx context.Context, pubMedID string) (*document.Document, error) { - url := fmt.Sprintf(pubMedBioCURLFormat, pubMedID) req, err := http.NewRequest("GET", url, nil) diff --git a/loader/tesseract.go b/loader/tesseract.go index 97f7d2b0..382a34a4 100644 --- a/loader/tesseract.go +++ b/loader/tesseract.go @@ -46,7 +46,6 @@ func (l *TesseractLoader) WithArgs(tesseractArgs []string) *TesseractLoader { } func (l *TesseractLoader) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(l.tesseractPath) if err != nil { return nil, ErrTesseractNotFound @@ -70,7 +69,6 @@ func (l *TesseractLoader) Load(ctx context.Context) ([]document.Document, error) } func (l *TesseractLoader) loadFile(ctx context.Context) ([]document.Document, error) { - tesseractArgs := []string{l.filename, "stdout"} tesseractArgs = append(tesseractArgs, l.tesseractArgs...) diff --git a/loader/whisper.go b/loader/whisper.go index e00ba753..0edf2dff 100644 --- a/loader/whisper.go +++ b/loader/whisper.go @@ -18,7 +18,6 @@ type WhisperLoader struct { } func NewWhisperLoader(filename string) *WhisperLoader { - openAIApiKey := os.Getenv("OPENAI_API_KEY") return &WhisperLoader{ @@ -33,7 +32,6 @@ func (w *WhisperLoader) WithClient(client *openai.Client) *WhisperLoader { } func (w *WhisperLoader) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(w.filename) if err != nil { return nil, err diff --git a/loader/whispercpp.go b/loader/whispercpp.go index 22ae0e9e..f442d01f 100644 --- a/loader/whispercpp.go +++ b/loader/whispercpp.go @@ -61,7 +61,6 @@ func (w *WhisperCppLoader) WithArgs(whisperCppArgs []string) *WhisperCppLoader { } func (w *WhisperCppLoader) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(w.ffmpegPath) if err != nil { return nil, err @@ -99,7 +98,6 @@ func (w *WhisperCppLoader) Load(ctx context.Context) ([]document.Document, error } func (w *WhisperCppLoader) convertAndTrascribe(ctx context.Context) (string, error) { - ffmpegArgs := []string{"-i", w.filename} ffmpegArgs = append(ffmpegArgs, w.ffmpegArgs...) //nolint:gosec diff --git a/loader/youtube-dl.go b/loader/youtube-dl.go index a915a60c..e3a2ee9e 100644 --- a/loader/youtube-dl.go +++ b/loader/youtube-dl.go @@ -59,7 +59,6 @@ func (y *YoutubeDLLoader) WithAutoSubtitlesMode() *YoutubeDLLoader { } func (y *YoutubeDLLoader) Load(ctx context.Context) ([]document.Document, error) { - err := isFile(y.youtubeDlPath) if err != nil { return nil, ErrYoutubeDLNotFound @@ -78,7 +77,6 @@ func (y *YoutubeDLLoader) Load(ctx context.Context) ([]document.Document, error) } func (y *YoutubeDLLoader) loadVideo(ctx context.Context) ([]document.Document, error) { - tempDir, err := os.MkdirTemp("", "youtube-dl") if err != nil { return nil, err @@ -119,7 +117,6 @@ func (y *YoutubeDLLoader) loadVideo(ctx context.Context) ([]document.Document, e } func convertVTTtoPlainText(filename string) (string, error) { - file, err := os.Open(filename) if err != nil { return "", err @@ -134,7 +131,6 @@ func convertVTTtoPlainText(filename string) (string, error) { var plainText string for _, line := range lines { - timestampRegex := regexp.MustCompile(`\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}`) line = timestampRegex.ReplaceAllString(line, "") diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index b12581f6..2a4eec7e 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -44,7 +44,6 @@ type Pipeline struct { } func New(pipes ...Pipe) *Pipeline { - pipesMap := make(map[int]Pipe) for i, pipe := range pipes { pipesMap[i] = pipe @@ -56,7 +55,6 @@ func New(pipes ...Pipe) *Pipeline { } func (p *Pipeline) WithPreCallbacks(callbacks ...Callback) *Pipeline { - p.preCallbacks = make(map[int]Callback) for i, callback := range callbacks { p.preCallbacks[i] = callback @@ -66,7 +64,6 @@ func (p *Pipeline) WithPreCallbacks(callbacks ...Callback) *Pipeline { } func (p *Pipeline) WithPostCallbacks(callbacks ...Callback) *Pipeline { - p.postCallbacks = make(map[int]Callback) for i, callback := range callbacks { p.postCallbacks[i] = callback @@ -87,7 +84,6 @@ func (p Pipeline) Run(ctx context.Context, input types.M) (types.M, error) { output := input for { - if p.thereIsAValidPreCallbackForTube(currentTube) { output, err = p.preCallbacks[currentTube](ctx, output) if err != nil { @@ -114,7 +110,6 @@ func (p Pipeline) Run(ctx context.Context, input types.M) (types.M, error) { currentTube = *nextTube continue } - } currentTube++ @@ -122,7 +117,6 @@ func (p Pipeline) Run(ctx context.Context, input types.M) (types.M, error) { if currentTube == len(p.pipes) { break } - } return output, nil @@ -149,7 +143,6 @@ func (p *Pipeline) thereIsAValidPostCallbackForTube(currentTube int) bool { } func (p *Pipeline) getNextTube(output types.M) *int { - nextTube, ok := output[NextTubeKey] if !ok { return nil @@ -161,5 +154,4 @@ func (p *Pipeline) getNextTube(output types.M) *int { } return nil - } diff --git a/pipeline/qa/qa.go b/pipeline/qa/qa.go index f7cebdfe..7e6a7f18 100644 --- a/pipeline/qa/qa.go +++ b/pipeline/qa/qa.go @@ -33,7 +33,6 @@ type QAPipeline struct { } func New(llmEngine pipeline.LlmEngine) *QAPipeline { - systemPrompt := prompt.New(qaTubeSystemPromptTemplate) userPrompt := prompt.NewPromptTemplate(qaTubeUserPromptTemplate) @@ -95,7 +94,6 @@ func (q *QAPipeline) Query(ctx context.Context, query string, opts ...indexoptio } func (q *QAPipeline) Run(ctx context.Context, query string, documents []document.Document) (types.M, error) { - content := "" for _, document := range documents { content += document.Content + "\n" @@ -108,5 +106,4 @@ func (q *QAPipeline) Run(ctx context.Context, query string, documents []document "context": content, }, ) - } diff --git a/pipeline/splitter.go b/pipeline/splitter.go index 74fc061b..36d7aebb 100644 --- a/pipeline/splitter.go +++ b/pipeline/splitter.go @@ -44,7 +44,6 @@ func (s *Splitter) WithMemory(name string, memory Memory) *Splitter { } func (s *Splitter) Run(ctx context.Context, input types.M) (types.M, error) { - splittedInputs, err := s.splitterFn(input) if err != nil { return nil, fmt.Errorf("%s: %w", ErrSplitFunction, err) @@ -81,5 +80,4 @@ func (s *Splitter) Run(ctx context.Context, input types.M) (types.M, error) { wg.Wait() return types.M{types.DefaultOutputKey: pipeOutpus}, nil - } diff --git a/pipeline/sql/sql.go b/pipeline/sql/sql.go index 89f4babb..0e5c4fbd 100644 --- a/pipeline/sql/sql.go +++ b/pipeline/sql/sql.go @@ -59,7 +59,6 @@ func New( dataSourceType DataSourceType, dataSourceName string, ) (*pipeline.Pipeline, error) { - memory := types.M{} if !llmImplementsSetStop(llmEngine) { @@ -156,7 +155,6 @@ func New( WithPostCallbacks(postQueryCB, postRefineCBFn, postDescribeCB) return sqlPipeline, nil - } func llmImplementsSetStop(llmEngine pipeline.LlmEngine) bool { @@ -239,7 +237,6 @@ func postRefineCBFn(output types.M, db *sql.DB, sqlDDL string, memory types.M) ( } func getSQLResult(db *sql.DB, query string) (string, error) { - rows, err := db.Query(query) if err != nil { return "", err @@ -274,13 +271,11 @@ func getSQLResult(db *sql.DB, query string) (string, error) { row := "" for _, col := range values { - if row != "" { row += "|" + string(col) } else { row += string(col) } - } content += "\n" + row @@ -290,7 +285,6 @@ func getSQLResult(db *sql.DB, query string) (string, error) { } func openDatabase(dataSourceType DataSourceType, dataSourceName string) (*sql.DB, error) { - db, err := sql.Open(string(dataSourceType), dataSourceName) if err != nil { return nil, err @@ -305,11 +299,9 @@ func openDatabase(dataSourceType DataSourceType, dataSourceName string) (*sql.DB } func getDDL(db *sql.DB, dataSourceType DataSourceType, dataSourceName string) (string, error) { - if dataSourceType == DataSourceSqlite { return getSqliteSchema(db) } else if dataSourceType == DataSourceMySQL { - dataSourceNameParts := strings.Split(dataSourceName, "/") if len(dataSourceNameParts) < 1 { return "", fmt.Errorf("invalid mysql data source name %s", dataSourceName) @@ -320,11 +312,9 @@ func getDDL(db *sql.DB, dataSourceType DataSourceType, dataSourceName string) (s } else { return "", fmt.Errorf("unsupported datasource %s", dataSourceType) } - } func getPromptTemplate(dataSourceType DataSourceType) (string, error) { - if dataSourceType == DataSourceSqlite { return dataSourceTypePromptTemplate[DataSourceSqlite], nil } else if dataSourceType == DataSourceMySQL { @@ -332,5 +322,4 @@ func getPromptTemplate(dataSourceType DataSourceType) (string, error) { } else { return "", fmt.Errorf("unsupported database scheme %s", dataSourceType) } - } diff --git a/pipeline/sql/sqlite.go b/pipeline/sql/sqlite.go index 49cd03f4..0b1688cd 100644 --- a/pipeline/sql/sqlite.go +++ b/pipeline/sql/sqlite.go @@ -15,7 +15,6 @@ Pay attention to use only the column names you can see in the tables below. Be c Pay attention to use date('now') function to get the current date, if the question involves "today".` func getSqliteSchema(db *sql.DB) (string, error) { - var schema string // Retrieve table names @@ -129,5 +128,4 @@ func getSqliteSchema(db *sql.DB) (string, error) { } return schema, nil - } diff --git a/pipeline/summarize/summarize.go b/pipeline/summarize/summarize.go index 05cbd2cd..445f7f3c 100644 --- a/pipeline/summarize/summarize.go +++ b/pipeline/summarize/summarize.go @@ -15,7 +15,6 @@ type Loader interface { } func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { - docs := []document.Document{} iterator := 0 remainigDocs := 0 @@ -80,5 +79,4 @@ func New(llmEngine pipeline.LlmEngine, loader Loader) *pipeline.Pipeline { WithPreCallbacks(preSummaryCB, preRefineCB).WithPostCallbacks(postSummaryCB, postRefineCB) return summarizePipeline - } diff --git a/pipeline/tube.go b/pipeline/tube.go index 9e694850..244272f6 100644 --- a/pipeline/tube.go +++ b/pipeline/tube.go @@ -50,7 +50,6 @@ func (t *Tube) WithDecoder(decoder Decoder) *Tube { // The prompt is formatted with the input and the output of the prompt is used as input for the LLM. // If the step has a memory, the output is stored in the memory. func (t *Tube) Run(ctx context.Context, input types.M) (types.M, error) { - if input == nil { input = types.M{} } @@ -82,7 +81,6 @@ func (t *Tube) Run(ctx context.Context, input types.M) (types.M, error) { } return decodedOutput, nil - } func (t *Tube) executeLLM(ctx context.Context, input types.M) (string, error) { @@ -124,7 +122,6 @@ func (t *Tube) executeLLMCompletion(ctx context.Context, input types.M) (string, } func (t *Tube) executeLLMChat(ctx context.Context, input types.M) (string, error) { - for _, promptMessage := range t.llm.Chat.PromptMessages() { err := promptMessage.Prompt.Format(input) if err != nil { diff --git a/prompt/template.go b/prompt/template.go index 10c97b36..e50e13b8 100644 --- a/prompt/template.go +++ b/prompt/template.go @@ -17,7 +17,6 @@ type Template struct { } func NewPromptTemplate(text string) *Template { - promptTemplate := &Template{ input: types.M{}, template: text, @@ -33,7 +32,6 @@ func (t *Template) WithInputs(inputs interface{}) *Template { // Format formats the prompt using the template engine and the provided inputs. func (t *Template) Format(input types.M) error { - err := t.initTemplateEngine() if err != nil { return ErrTemplateEngine @@ -67,7 +65,6 @@ func (t *Template) String() string { } func (t *Template) initTemplateEngine() error { - if t.templateEngine != nil { return nil } diff --git a/textsplitter/recursiveTextSplitter.go b/textsplitter/recursiveTextSplitter.go index 07d00211..7f2d199c 100644 --- a/textsplitter/recursiveTextSplitter.go +++ b/textsplitter/recursiveTextSplitter.go @@ -42,12 +42,10 @@ func (r *RecursiveCharacterTextSplitter) WithLengthFunction( // AI-translated from https://github.com/hwchase17/langchain/blob/master/langchain/text_splitter.py func (r *RecursiveCharacterTextSplitter) SplitDocuments(documents []document.Document) []document.Document { - docs := make([]document.Document, 0) for i, doc := range documents { for _, chunk := range r.SplitText(doc.Content) { - metadata := make(types.Meta) for k, v := range documents[i].Metadata { metadata[k] = v diff --git a/textsplitter/textsplitter.go b/textsplitter/textsplitter.go index 5765ce13..fc0e7fc9 100644 --- a/textsplitter/textsplitter.go +++ b/textsplitter/textsplitter.go @@ -14,7 +14,6 @@ type TextSplitter struct { } func (t *TextSplitter) mergeSplits(splits []string, separator string) []string { - docs := make([]string, 0) currentDoc := make([]string, 0) total := 0 diff --git a/transformer/cohere-rerank.go b/transformer/cohere-rerank.go index f8bd2f21..380e5be7 100644 --- a/transformer/cohere-rerank.go +++ b/transformer/cohere-rerank.go @@ -99,7 +99,6 @@ func (c *CohereRerank) rerankDocuments( documents []document.Document, results []model.RerankResult, ) []document.Document { - rerankedDocuments := make([]document.Document, 0) for _, result := range results { index := result.Index diff --git a/transformer/dall-e.go b/transformer/dall-e.go index 2274afa9..f3cc0774 100644 --- a/transformer/dall-e.go +++ b/transformer/dall-e.go @@ -102,7 +102,6 @@ func (d *DallE) transformToURL(ctx context.Context, input string) (any, error) { } func (d *DallE) transformToFile(ctx context.Context, input string) (any, error) { - imgData, err := d.transformToImage(ctx, input) if err != nil { return nil, err @@ -122,7 +121,6 @@ func (d *DallE) transformToFile(ctx context.Context, input string) (any, error) } func (d *DallE) transformToImage(ctx context.Context, input string) (any, error) { - reqBase64 := openai.ImageRequest{ Prompt: input, Size: string(d.imageSize), diff --git a/transformer/visual-question-answering.go b/transformer/visual-question-answering.go index 2a15abf5..7d942549 100644 --- a/transformer/visual-question-answering.go +++ b/transformer/visual-question-answering.go @@ -60,7 +60,6 @@ func (v *VisualQuestionAnswering) WithImage(mediaFile string) *VisualQuestionAns } func (v *VisualQuestionAnswering) Transform(ctx context.Context, input string, all bool) (any, error) { - respJSON, err := hfVisualQuestionAnsweringHTTPCall(ctx, v.token, v.model, v.mediaFile, input) if err != nil { return "", err @@ -80,7 +79,6 @@ func (v *VisualQuestionAnswering) Transform(ctx context.Context, input string, a } func hfVisualQuestionAnsweringHTTPCall(ctx context.Context, token, model, mediaFile, question string) ([]byte, error) { - var inputs VisualQuestionAnsweringRequest base64String, err := imageToBase64(mediaFile) @@ -128,7 +126,6 @@ func hfVisualQuestionAnsweringHTTPCall(ctx context.Context, token, model, mediaF } func hfCheckHTTPResponse(respJSON []byte) error { - type apiError struct { Error string `json:"error,omitempty"` } @@ -167,7 +164,6 @@ func hfCheckHTTPResponse(respJSON []byte) error { } func imageToBase64(mediaFile string) (string, error) { - img, err := os.Open(mediaFile) if err != nil { return "", err diff --git a/types/types.go b/types/types.go index d053d39e..6f8c713e 100644 --- a/types/types.go +++ b/types/types.go @@ -8,7 +8,6 @@ type Meta map[string]interface{} // String returns the metadata as a JSON string func (m Meta) String() string { - jsonData, err := json.Marshal(m) if err != nil { return "" From 61bcb8ce8042c28d2fd4e503cd3cf7a26dc7385b Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:19:45 +0200 Subject: [PATCH 3/9] chore: check errors in lint --- .golangci.yml | 146 ++++++++++++++++++- embedder/openai/openai.go | 6 +- index/index.go | 5 +- index/pinecone/pinecone.go | 6 +- index/qdrant/qdrant.go | 6 +- index/simpleVectorIndex/simpleVectorIndex.go | 18 +-- llm/openai/function.go | 8 +- llm/openai/openai.go | 12 +- loader/csv.go | 8 +- loader/directory.go | 6 +- loader/pdf_to_text.go | 6 +- loader/pubmed.go | 6 +- loader/youtube-dl.go | 4 +- pipeline/splitter.go | 4 +- pipeline/sql/mysql.go | 24 +-- pipeline/sql/sqlite.go | 24 +-- transformer/dall-e.go | 4 +- 17 files changed, 215 insertions(+), 78 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 1149ef52..d75a9362 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -15,11 +15,145 @@ run: deadline: 5m + +# This file contains only configs which differ from defaults. +# All possible options can be found here https://github.com/golangci/golangci-lint/blob/master/.golangci.reference.yml +linters-settings: + cyclop: + # The maximal code complexity to report. + # Default: 10 + max-complexity: 30 + # The maximal average package complexity. + # If it's higher than 0.0 (float) the check is enabled + # Default: 0.0 + package-average: 10.0 + + errcheck: + # Report about not checking of errors in type assertions: `a := b.(MyStruct)`. + # Such cases aren't reported by default. + # Default: false + check-type-assertions: true + + funlen: + # Checks the number of lines in a function. + # If lower than 0, disable the check. + # Default: 60 + lines: 100 + # Checks the number of statements in a function. + # If lower than 0, disable the check. + # Default: 40 + statements: 50 + + gocognit: + # Minimal code complexity to report + # Default: 30 (but we recommend 10-20) + min-complexity: 20 + + gocritic: + # Settings passed to gocritic. + # The settings key is the name of a supported gocritic checker. + # The list of supported checkers can be find in https://go-critic.github.io/overview. + settings: + captLocal: + # Whether to restrict checker to params only. + # Default: true + paramsOnly: false + underef: + # Whether to skip (*x).method() calls where x is a pointer receiver. + # Default: true + skipRecvDeref: false + + gomnd: + # List of function patterns to exclude from analysis. + # Values always ignored: `time.Date` + # Default: [] + ignored-functions: + - os.Chmod + - os.Mkdir + - os.MkdirAll + - os.OpenFile + - os.WriteFile + - prometheus.ExponentialBuckets + - prometheus.ExponentialBucketsRange + - prometheus.LinearBuckets + - strconv.FormatFloat + - strconv.FormatInt + - strconv.FormatUint + - strconv.ParseFloat + - strconv.ParseInt + - strconv.ParseUint + + gomodguard: + blocked: + # List of blocked modules. + # Default: [] + modules: + - github.com/golang/protobuf: + recommendations: + - google.golang.org/protobuf + reason: "see https://developers.google.com/protocol-buffers/docs/reference/go/faq#modules" + - github.com/satori/go.uuid: + recommendations: + - github.com/google/uuid + reason: "satori's package is not maintained" + - github.com/gofrs/uuid: + recommendations: + - github.com/google/uuid + reason: "see recommendation from dev-infra team: https://confluence.gtforge.com/x/gQI6Aw" + + govet: + # Enable all analyzers. + # Default: false + enable-all: true + # Disable analyzers by name. + # Run `go tool vet help` to see all analyzers. + # Default: [] + disable: + - fieldalignment # too strict + # Settings per analyzer. + settings: + shadow: + # Whether to be strict about shadowing; can be noisy. + # Default: false + strict: true + + nakedret: + # Make an issue if func has more lines of code than this setting, and it has naked returns. + # Default: 30 + max-func-lines: 0 + + nolintlint: + # Exclude following linters from requiring an explanation. + # Default: [] + allow-no-explanation: [ funlen, gocognit, lll ] + # Enable to require an explanation of nonzero length after each nolint directive. + # Default: false + require-explanation: true + # Enable to require nolint directives to mention the specific linter being suppressed. + # Default: false + require-specific: true + + rowserrcheck: + # database/sql is always checked + # Default: [] + packages: + - github.com/jmoiron/sqlx + + tenv: + # The option `all` will run against whole test files (`_test.go`) regardless of method/function signatures. + # Otherwise, only methods that take `*testing.T`, `*testing.B`, and `testing.TB` as arguments are checked. + # Default: false + all: true + + varcheck: + # Check usage of exported fields and variables. + # Default: false + exported-fields: false # default false # TODO: enable after fixing false positives + linters: disable-all: true enable: #- bodyclose - # - deadcode ! deprecated since v1.49.0; replaced by 'unused' #- depguard #- dogsled #- dupl @@ -38,7 +172,7 @@ linters: #- gomnd #- goprintffuncname #- gosec - #- gosimple + - gosimple - govet - ineffassign # - interfacer @@ -51,13 +185,13 @@ linters: #- nolintlint #- rowserrcheck #- scopelint - #- staticcheck - #- structcheck ! deprecated since v1.49.0; replaced by 'unused' + - staticcheck #- stylecheck - #- typecheck + - typecheck - unconvert #- unparam - unused - # - varcheck ! deprecated since v1.49.0; replaced by 'unused' - whitespace + - asciicheck + - bidichk fast: false \ No newline at end of file diff --git a/embedder/openai/openai.go b/embedder/openai/openai.go index 1cff94ce..27eade2d 100644 --- a/embedder/openai/openai.go +++ b/embedder/openai/openai.go @@ -173,9 +173,9 @@ func (o *OpenAIEmbedder) chunkText(text string, maxTokens int) ([]string, error) end = len(tokens) } - textChunk, err := o.tokensToText(tokens[i:end]) - if err != nil { - return nil, err + textChunk, errToken := o.tokensToText(tokens[i:end]) + if errToken != nil { + return nil, errToken } textChunks = append(textChunks, textChunk) diff --git a/index/index.go b/index/index.go index 0b2e817b..f7e18c69 100644 --- a/index/index.go +++ b/index/index.go @@ -39,7 +39,10 @@ func (s SearchResults) ToDocuments() []document.Document { documents := make([]document.Document, len(s)) for i, searchResult := range s { metadata := DeepCopyMetadata(searchResult.Metadata) - content := metadata[DefaultKeyContent].(string) + content, ok := metadata[DefaultKeyContent].(string) + if !ok { + content = "" + } delete(metadata, DefaultKeyContent) documents[i] = document.Document{ diff --git a/index/pinecone/pinecone.go b/index/pinecone/pinecone.go index 946fb368..5981bc88 100644 --- a/index/pinecone/pinecone.go +++ b/index/pinecone/pinecone.go @@ -129,9 +129,9 @@ func (p *Index) Add(ctx context.Context, item *index.Data) error { } if item.ID == "" { - id, err := uuid.NewUUID() - if err != nil { - return err + id, errUUID := uuid.NewUUID() + if errUUID != nil { + return errUUID } item.ID = id.String() } diff --git a/index/qdrant/qdrant.go b/index/qdrant/qdrant.go index e9c0a87c..c128adc2 100644 --- a/index/qdrant/qdrant.go +++ b/index/qdrant/qdrant.go @@ -118,9 +118,9 @@ func (q *Index) Add(ctx context.Context, item *index.Data) error { } if item.ID == "" { - id, err := uuid.NewUUID() - if err != nil { - return err + id, errUUID := uuid.NewUUID() + if errUUID != nil { + return errUUID } item.ID = id.String() } diff --git a/index/simpleVectorIndex/simpleVectorIndex.go b/index/simpleVectorIndex/simpleVectorIndex.go index e2beab36..4cc2d854 100644 --- a/index/simpleVectorIndex/simpleVectorIndex.go +++ b/index/simpleVectorIndex/simpleVectorIndex.go @@ -64,15 +64,15 @@ func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu texts = append(texts, document.Content) } - embeddings, err := s.embedder.Embed(ctx, texts) - if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + embeddings, errEmbed := s.embedder.Embed(ctx, texts) + if errEmbed != nil { + return fmt.Errorf("%s: %w", index.ErrInternal, errEmbed) } for j, document := range documents[i:end] { - id, err := uuid.NewUUID() - if err != nil { - return err + id, errUUID := uuid.NewUUID() + if errUUID != nil { + return errUUID } s.data = append(s.data, buildDataFromEmbeddingAndDocument(id.String(), embeddings[j], document)) } @@ -147,9 +147,9 @@ func (s *Index) Add(ctx context.Context, item *index.Data) error { } if item.ID == "" { - id, err := uuid.NewUUID() - if err != nil { - return err + id, errUUID := uuid.NewUUID() + if errUUID != nil { + return errUUID } item.ID = id.String() } diff --git a/llm/openai/function.go b/llm/openai/function.go index 4e5b8109..07ad2021 100644 --- a/llm/openai/function.go +++ b/llm/openai/function.go @@ -159,11 +159,11 @@ func callFnWithArgumentAsJSON(fn interface{}, argumentAsJSON string) (string, er // Marshal the function result to JSON if len(result) > 0 { - jsonData, err := json.Marshal(result[0].Interface()) - if err != nil { - return "", fmt.Errorf("error marshaling result: %s", err) + jsonResultData, errMarshal := json.Marshal(result[0].Interface()) + if errMarshal != nil { + return "", fmt.Errorf("error marshaling result: %s", errMarshal) } - return string(jsonData), nil + return string(jsonResultData), nil } return "", nil diff --git a/llm/openai/openai.go b/llm/openai/openai.go index 19147d4b..309914ff 100644 --- a/llm/openai/openai.go +++ b/llm/openai/openai.go @@ -259,13 +259,13 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC defer stream.Close() for { - response, err := stream.Recv() - if errors.Is(err, io.EOF) { + response, errRecv := stream.Recv() + if errors.Is(errRecv, io.EOF) { break } - if err != nil { - return fmt.Errorf("%s: %w", ErrOpenAICompletion, err) + if errRecv != nil { + return fmt.Errorf("%s: %w", ErrOpenAICompletion, errRecv) } if o.usageCallback != nil { @@ -375,8 +375,8 @@ func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prom } for { - response, err := stream.Recv() - if errors.Is(err, io.EOF) { + response, errRecv := stream.Recv() + if errors.Is(errRecv, io.EOF) { break } diff --git a/loader/csv.go b/loader/csv.go index 24f0d004..c4f95b53 100644 --- a/loader/csv.go +++ b/loader/csv.go @@ -85,12 +85,12 @@ func (c *CSVLoader) readCSV() ([]document.Document, error) { var titles []string for { - record, err := reader.Read() - if err == io.EOF { + record, errRead := reader.Read() + if errRead == io.EOF { break } - if err != nil { - return nil, err + if errRead != nil { + return nil, errRead } if titles == nil { diff --git a/loader/directory.go b/loader/directory.go index 0e9f024a..ed12124d 100644 --- a/loader/directory.go +++ b/loader/directory.go @@ -45,9 +45,9 @@ func (d *DirectoryLoader) Load(ctx context.Context) ([]document.Document, error) err = filepath.Walk(d.dirname, func(path string, info os.FileInfo, err error) error { if err == nil && regExp.MatchString(info.Name()) { - d, err := NewTextLoader(path, nil).Load(ctx) - if err != nil { - return err + d, errLoad := NewTextLoader(path, nil).Load(ctx) + if errLoad != nil { + return errLoad } docs = append(docs, d...) diff --git a/loader/pdf_to_text.go b/loader/pdf_to_text.go index 21f8e921..b9cfc259 100644 --- a/loader/pdf_to_text.go +++ b/loader/pdf_to_text.go @@ -92,9 +92,9 @@ func (p *PDFLoader) loadDir(ctx context.Context) ([]document.Document, error) { err := filepath.Walk(p.path, func(path string, info os.FileInfo, err error) error { if err == nil && strings.HasSuffix(info.Name(), ".pdf") { - d, err := NewPDFToTextLoader(path).WithPDFToTextPath(p.pdfToTextPath).loadFile(ctx) - if err != nil { - return err + d, errLoad := NewPDFToTextLoader(path).WithPDFToTextPath(p.pdfToTextPath).loadFile(ctx) + if errLoad != nil { + return errLoad } docs = append(docs, d...) diff --git a/loader/pubmed.go b/loader/pubmed.go index f3ad6cd0..99e3b891 100644 --- a/loader/pubmed.go +++ b/loader/pubmed.go @@ -79,14 +79,14 @@ func (p *PubMedLoader) load(ctx context.Context, pubMedID string) (*document.Doc return nil, err } - var pubMedDocument pubMedDocument - err = json.Unmarshal(jsonContent, &pubMedDocument) + var pubMedDoc pubMedDocument + err = json.Unmarshal(jsonContent, &pubMedDoc) if err != nil { return nil, err } content := "" - for _, document := range pubMedDocument.Documents { + for _, document := range pubMedDoc.Documents { for _, passage := range document.Passages { content += passage.Text } diff --git a/loader/youtube-dl.go b/loader/youtube-dl.go index e3a2ee9e..1301b21c 100644 --- a/loader/youtube-dl.go +++ b/loader/youtube-dl.go @@ -148,8 +148,8 @@ func convertVTTtoPlainText(filename string) (string, error) { } } - if err := scanner.Err(); err != nil { - return "", err + if errScanner := scanner.Err(); errScanner != nil { + return "", errScanner } return plainText, nil diff --git a/pipeline/splitter.go b/pipeline/splitter.go index 36d7aebb..f5a95d88 100644 --- a/pipeline/splitter.go +++ b/pipeline/splitter.go @@ -66,8 +66,8 @@ func (s *Splitter) Run(ctx context.Context, input types.M) (types.M, error) { tube = tube.WithDecoder(s.decoder) } - output, err := tube.Run(ctx, splittedInput) - if err != nil { + output, errRun := tube.Run(ctx, splittedInput) + if errRun != nil { return } diff --git a/pipeline/sql/mysql.go b/pipeline/sql/mysql.go index a6288128..b879733f 100644 --- a/pipeline/sql/mysql.go +++ b/pipeline/sql/mysql.go @@ -28,14 +28,14 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { // Loop through tables and retrieve schema for rows.Next() { var tableName string - if err := rows.Scan(&tableName); err != nil { - return "", err + if errQuery := rows.Scan(&tableName); errQuery != nil { + return "", errQuery } // Retrieve column information - cols, err := db.Query(fmt.Sprintf("SHOW COLUMNS FROM %s", tableName)) - if err != nil { - return "", err + cols, errQuery := db.Query(fmt.Sprintf("SHOW COLUMNS FROM %s", tableName)) + if errQuery != nil { + return "", errQuery } defer cols.Close() @@ -52,8 +52,8 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { def sql.NullString extra sql.NullString ) - if err := cols.Scan(&field, &typ, &null, &key, &def, &extra); err != nil { - return "", err + if errScan := cols.Scan(&field, &typ, &null, &key, &def, &extra); errScan != nil { + return "", errScan } // Build column definition @@ -82,9 +82,9 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { // Retrieve foreign key information //nolint:lll - fks, err := db.Query(fmt.Sprintf("SELECT constraint_name, column_name, referenced_table_name, referenced_column_name FROM information_schema.key_column_usage WHERE table_schema = '%s' AND table_name = '%s' AND referenced_table_name IS NOT NULL", dbName, tableName)) - if err != nil { - return "", err + fks, errQuery := db.Query(fmt.Sprintf("SELECT constraint_name, column_name, referenced_table_name, referenced_column_name FROM information_schema.key_column_usage WHERE table_schema = '%s' AND table_name = '%s' AND referenced_table_name IS NOT NULL", dbName, tableName)) + if errQuery != nil { + return "", errQuery } defer fks.Close() @@ -97,8 +97,8 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { referencedTableName string referencedColumnName string ) - if err := fks.Scan(&constraintName, &columnName, &referencedTableName, &referencedColumnName); err != nil { - return "", err + if errScan := fks.Scan(&constraintName, &columnName, &referencedTableName, &referencedColumnName); errScan != nil { + return "", errScan } fkDef := fmt.Sprintf(" CONSTRAINT %s FOREIGN KEY (%s) REFERENCES %s (%s)", diff --git a/pipeline/sql/sqlite.go b/pipeline/sql/sqlite.go index 0b1688cd..42a0dc9c 100644 --- a/pipeline/sql/sqlite.go +++ b/pipeline/sql/sqlite.go @@ -27,14 +27,14 @@ func getSqliteSchema(db *sql.DB) (string, error) { // Loop through tables and retrieve schema for rows.Next() { var tableName string - if err := rows.Scan(&tableName); err != nil { - return "", err + if errScan := rows.Scan(&tableName); errScan != nil { + return "", errScan } // Retrieve column information - cols, err := db.Query(fmt.Sprintf("PRAGMA table_info(%s)", tableName)) - if err != nil { - return "", err + cols, errQuery := db.Query(fmt.Sprintf("PRAGMA table_info(%s)", tableName)) + if errQuery != nil { + return "", errQuery } defer cols.Close() @@ -51,8 +51,8 @@ func getSqliteSchema(db *sql.DB) (string, error) { defaultVal sql.NullString primaryKey int ) - if err := cols.Scan(&colNum, &colName, &colType, ¬Null, &defaultVal, &primaryKey); err != nil { - return "", err + if errScan := cols.Scan(&colNum, &colName, &colType, ¬Null, &defaultVal, &primaryKey); errScan != nil { + return "", errScan } // Build column definition @@ -72,9 +72,9 @@ func getSqliteSchema(db *sql.DB) (string, error) { } // Retrieve foreign key information - fks, err := db.Query(fmt.Sprintf("PRAGMA foreign_key_list(%s)", tableName)) - if err != nil { - return "", err + fks, errQuery := db.Query(fmt.Sprintf("PRAGMA foreign_key_list(%s)", tableName)) + if errQuery != nil { + return "", errQuery } defer fks.Close() @@ -92,8 +92,8 @@ func getSqliteSchema(db *sql.DB) (string, error) { match string foreignKeyDef string ) - if err := fks.Scan(&id, &seq, &table, &from, &to, &onUpdate, &onDelete, &match); err != nil { - return "", err + if errScan := fks.Scan(&id, &seq, &table, &from, &to, &onUpdate, &onDelete, &match); errScan != nil { + return "", errScan } foreignKeyDef = fmt.Sprintf(" FOREIGN KEY (%s) REFERENCES %s(%s)", from, table, to) diff --git a/transformer/dall-e.go b/transformer/dall-e.go index f3cc0774..bea9a821 100644 --- a/transformer/dall-e.go +++ b/transformer/dall-e.go @@ -113,8 +113,8 @@ func (d *DallE) transformToFile(ctx context.Context, input string) (any, error) } defer file.Close() - if err := png.Encode(file, imgData.(image.Image)); err != nil { - return nil, err + if errEncode := png.Encode(file, imgData.(image.Image)); errEncode != nil { + return nil, errEncode } return nil, nil From 441c9a628682e76631513e2f32c32f5828ebe74a Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:36:56 +0200 Subject: [PATCH 4/9] chore: wrapping errors --- .golangci.yml | 47 +++++++------------- chat/chat.go | 2 +- decoder/decoder.go | 4 +- index/index.go | 3 +- index/pinecone/pinecone.go | 22 ++++----- index/qdrant/qdrant.go | 14 +++--- index/simpleVectorIndex/simpleVectorIndex.go | 16 +++---- llm/huggingface/huggingface.go | 9 ++-- llm/huggingface/textgeneration.go | 2 +- llm/openai/function.go | 12 ++--- llm/openai/openai.go | 30 ++++++------- loader/csv.go | 9 ++-- loader/directory.go | 4 +- loader/hf_image_to_text.go | 6 +-- loader/hf_speech_recognition.go | 6 +-- loader/loader.go | 6 +-- loader/text.go | 8 ++-- loader/whisper.go | 2 +- pipeline/splitter.go | 2 +- pipeline/tube.go | 8 ++-- prompt/template.go | 6 +-- 21 files changed, 102 insertions(+), 116 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index d75a9362..241be6e8 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -153,45 +153,28 @@ linters-settings: linters: disable-all: true enable: - #- bodyclose - #- depguard - #- dogsled - #- dupl + # enabled by default - errcheck - - revive - #- exhaustive - #- funlen - - gas - #- gochecknoinits - - goconst - #- gocritic - #- gocyclo - #- gofmt - - goimports - # - golint - #- gomnd - #- goprintffuncname - #- gosec - gosimple - govet - ineffassign - # - interfacer - - lll - # - maligned - - megacheck - #- misspell - #- nakedret - #- noctx - #- nolintlint - #- rowserrcheck - #- scopelint - staticcheck - #- stylecheck - typecheck - - unconvert - #- unparam - unused - - whitespace + # disabled by default - asciicheck - bidichk + - bodyclose + - contextcheck + - cyclop + - dupl # Tool for code clone detection + - durationcheck # check for two durations multiplied together + - errname + - errorlint + - goconst + - goimports + - lll + - revive + - unconvert + - whitespace fast: false \ No newline at end of file diff --git a/chat/chat.go b/chat/chat.go index ff79ad40..99211555 100644 --- a/chat/chat.go +++ b/chat/chat.go @@ -81,7 +81,7 @@ func (c *Chat) ToMessages() (Messages, error) { if len(messagePromptTemplate.Prompt.String()) == 0 { err = messagePromptTemplate.Prompt.Format(types.M{}) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrChatMessages, err) + return nil, fmt.Errorf("%w: %w", ErrChatMessages, err) } } message.Content = messagePromptTemplate.Prompt.String() diff --git a/decoder/decoder.go b/decoder/decoder.go index 14157336..09ebe032 100644 --- a/decoder/decoder.go +++ b/decoder/decoder.go @@ -25,7 +25,7 @@ func NewJSONDecoder() *JSONDecoder { func (d *JSONDecoder) Decode(input string) (types.M, error) { err := json.Unmarshal([]byte(input), &d.output) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrDecoding, err) + return nil, fmt.Errorf("%w: %w", ErrDecoding, err) } return types.M{ @@ -47,7 +47,7 @@ func NewRegExDecoder(regex string) *RegExDecoder { func (d *RegExDecoder) Decode(input string) (types.M, error) { re, err := regexp.Compile(d.regex) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrDecoding, err) + return nil, fmt.Errorf("%w: %w", ErrDecoding, err) } matches := re.FindStringSubmatch(input) diff --git a/index/index.go b/index/index.go index f7e18c69..3778ef4d 100644 --- a/index/index.go +++ b/index/index.go @@ -2,6 +2,7 @@ package index import ( "context" + "errors" "sort" "github.com/henomis/lingoose/document" @@ -10,7 +11,7 @@ import ( ) var ( - ErrInternal = "internal index error" + ErrInternal = errors.New("internal index error") ) const ( diff --git a/index/pinecone/pinecone.go b/index/pinecone/pinecone.go index 5981bc88..27137fd1 100644 --- a/index/pinecone/pinecone.go +++ b/index/pinecone/pinecone.go @@ -78,12 +78,12 @@ func (p *Index) WithAPIKeyAndEnvironment(apiKey, environment string) *Index { func (p *Index) LoadFromDocuments(ctx context.Context, documents []document.Document) error { err := p.createIndexIfRequired(ctx) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } err = p.batchUpsert(ctx, documents) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } return nil } @@ -91,12 +91,12 @@ func (p *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu func (p *Index) IsEmpty(ctx context.Context) (bool, error) { err := p.createIndexIfRequired(ctx) if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } err = p.getProjectID(ctx) if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } req := &pineconerequest.VectorDescribeIndexStats{ @@ -107,7 +107,7 @@ func (p *Index) IsEmpty(ctx context.Context) (bool, error) { err = p.pineconeClient.VectorDescribeIndexStats(ctx, req, res) if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } namespace, ok := res.Namespaces[p.namespace] @@ -116,7 +116,7 @@ func (p *Index) IsEmpty(ctx context.Context) (bool, error) { } if namespace.VectorCount == nil { - return false, fmt.Errorf("%s: failed to get total index size", index.ErrInternal) + return false, fmt.Errorf("%w: failed to get total index size", index.ErrInternal) } return *namespace.VectorCount == 0, nil @@ -125,7 +125,7 @@ func (p *Index) IsEmpty(ctx context.Context) (bool, error) { func (p *Index) Add(ctx context.Context, item *index.Data) error { err := p.createIndexIfRequired(ctx) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } if item.ID == "" { @@ -162,7 +162,7 @@ func (p *Index) Search(ctx context.Context, values []float64, opts ...option.Opt matches, err := p.similaritySearch(ctx, values, pineconeOptions) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } searchResults := buildSearchResultsFromPineconeMatches(matches, p.includeContent) @@ -185,7 +185,7 @@ func (p *Index) Query(ctx context.Context, query string, opts ...option.Option) matches, err := p.query(ctx, query, pineconeOptions) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } searchResults := buildSearchResultsFromPineconeMatches(matches, p.includeContent) @@ -209,7 +209,7 @@ func (p *Index) similaritySearch( ) ([]pineconeresponse.QueryMatch, error) { err := p.getProjectID(ctx) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } includeMetadata := true @@ -338,7 +338,7 @@ func (p *Index) batchUpsert(ctx context.Context, documents []document.Document) func (p *Index) vectorUpsert(ctx context.Context, vectors []pineconerequest.Vector) error { err := p.getProjectID(ctx) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } req := &pineconerequest.VectorUpsert{ diff --git a/index/qdrant/qdrant.go b/index/qdrant/qdrant.go index c128adc2..a39ccecc 100644 --- a/index/qdrant/qdrant.go +++ b/index/qdrant/qdrant.go @@ -80,12 +80,12 @@ func (q *Index) WithAPIKeyAndEdpoint(apiKey, endpoint string) *Index { func (q *Index) LoadFromDocuments(ctx context.Context, documents []document.Document) error { err := q.createCollectionIfRequired(ctx) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } err = q.batchUpsert(ctx, documents) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } return nil } @@ -93,7 +93,7 @@ func (q *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu func (q *Index) IsEmpty(ctx context.Context) (bool, error) { err := q.createCollectionIfRequired(ctx) if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } res := &qdrantresponse.CollectionCollectInfo{} @@ -105,7 +105,7 @@ func (q *Index) IsEmpty(ctx context.Context) (bool, error) { res, ) if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } return res.Result.VectorsCount == 0, nil @@ -114,7 +114,7 @@ func (q *Index) IsEmpty(ctx context.Context) (bool, error) { func (q *Index) Add(ctx context.Context, item *index.Data) error { err := q.createCollectionIfRequired(ctx) if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } if item.ID == "" { @@ -147,7 +147,7 @@ func (q *Index) Search(ctx context.Context, values []float64, opts ...option.Opt matches, err := q.similaritySearch(ctx, values, qdrantOptions) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } searchResults := buildSearchResultsFromQdrantMatches(matches, q.includeContent) @@ -166,7 +166,7 @@ func (q *Index) Query(ctx context.Context, query string, opts ...option.Option) matches, err := q.query(ctx, query, qdrantOptions) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } searchResults := buildSearchResultsFromQdrantMatches(matches, q.includeContent) diff --git a/index/simpleVectorIndex/simpleVectorIndex.go b/index/simpleVectorIndex/simpleVectorIndex.go index 4cc2d854..2ebad31a 100644 --- a/index/simpleVectorIndex/simpleVectorIndex.go +++ b/index/simpleVectorIndex/simpleVectorIndex.go @@ -50,7 +50,7 @@ func New(name string, outputPath string, embedder index.Embedder) *Index { func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Document) error { err := s.load() if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } for i := 0; i < len(documents); i += defaultBatchSize { @@ -66,7 +66,7 @@ func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu embeddings, errEmbed := s.embedder.Embed(ctx, texts) if errEmbed != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, errEmbed) + return fmt.Errorf("%w: %w", index.ErrInternal, errEmbed) } for j, document := range documents[i:end] { @@ -80,7 +80,7 @@ func (s *Index) LoadFromDocuments(ctx context.Context, documents []document.Docu err = s.save() if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } return nil @@ -133,7 +133,7 @@ func (s *Index) database() string { func (s *Index) IsEmpty() (bool, error) { err := s.load() if err != nil { - return true, fmt.Errorf("%s: %w", index.ErrInternal, err) + return true, fmt.Errorf("%w: %w", index.ErrInternal, err) } return len(s.data) == 0, nil @@ -143,7 +143,7 @@ func (s *Index) Add(ctx context.Context, item *index.Data) error { _ = ctx err := s.load() if err != nil { - return fmt.Errorf("%s: %w", index.ErrInternal, err) + return fmt.Errorf("%w: %w", index.ErrInternal, err) } if item.ID == "" { @@ -177,7 +177,7 @@ func (s *Index) Search(ctx context.Context, values []float64, opts ...option.Opt err := s.load() if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } return s.similaritySearch(ctx, values, sviOptions) @@ -194,12 +194,12 @@ func (s *Index) Query(ctx context.Context, query string, opts ...option.Option) err := s.load() if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } embeddings, err := s.embedder.Embed(ctx, []string{query}) if err != nil { - return nil, fmt.Errorf("%s: %w", index.ErrInternal, err) + return nil, fmt.Errorf("%w: %w", index.ErrInternal, err) } return s.similaritySearch(ctx, embeddings[0], sviOptions) diff --git a/llm/huggingface/huggingface.go b/llm/huggingface/huggingface.go index 43b68b8d..85aed2ab 100644 --- a/llm/huggingface/huggingface.go +++ b/llm/huggingface/huggingface.go @@ -2,6 +2,7 @@ package huggingface import ( "context" + "errors" "fmt" "net/http" "os" @@ -9,8 +10,8 @@ import ( const APIBaseURL = "https://api-inference.huggingface.co/models/" -const ( - ErrHuggingFaceCompletion = "huggingface completion error" +var ( + ErrHuggingFaceCompletion = errors.New("huggingface completion error") ) type Mode int @@ -122,7 +123,7 @@ func (h *HuggingFace) Completion(ctx context.Context, prompt string) (string, er } if err != nil { - return "", fmt.Errorf("%s: %w", ErrHuggingFaceCompletion, err) + return "", fmt.Errorf("%w: %w", ErrHuggingFaceCompletion, err) } return output, nil @@ -142,7 +143,7 @@ func (h *HuggingFace) BatchCompletion(ctx context.Context, prompts []string) ([] } if err != nil { - return nil, fmt.Errorf("%s: %w", ErrHuggingFaceCompletion, err) + return nil, fmt.Errorf("%w: %w", ErrHuggingFaceCompletion, err) } return outputs, nil diff --git a/llm/huggingface/textgeneration.go b/llm/huggingface/textgeneration.go index 26f42f53..83a03c66 100644 --- a/llm/huggingface/textgeneration.go +++ b/llm/huggingface/textgeneration.go @@ -64,7 +64,7 @@ func (h *HuggingFace) textgenerationCompletion(ctx context.Context, prompts []st return nil, err } if len(tgrespsRaw) != len(request.Inputs) { - return nil, fmt.Errorf("%s: expected %d responses, got %d; response=%s", ErrHuggingFaceCompletion, + return nil, fmt.Errorf("%w: expected %d responses, got %d; response=%s", ErrHuggingFaceCompletion, len(request.Inputs), len(tgrespsRaw), string(respBody)) } diff --git a/llm/openai/function.go b/llm/openai/function.go index 07ad2021..c4115fc5 100644 --- a/llm/openai/function.go +++ b/llm/openai/function.go @@ -136,18 +136,18 @@ func callFnWithArgumentAsJSON(fn interface{}, argumentAsJSON string) (string, er var argValue interface{} err := json.Unmarshal([]byte(argumentAsJSON), &argValue) if err != nil { - return "", fmt.Errorf("error unmarshaling argument: %s", err) + return "", fmt.Errorf("error unmarshaling argument: %w", err) } // Convert the argument value to the correct type argValueReflect := reflect.New(argType).Elem() jsonData, err := json.Marshal(argValue) if err != nil { - return "", fmt.Errorf("error marshaling argument: %s", err) + return "", fmt.Errorf("error marshaling argument: %w", err) } err = json.Unmarshal(jsonData, argValueReflect.Addr().Interface()) if err != nil { - return "", fmt.Errorf("error unmarshaling argument: %s", err) + return "", fmt.Errorf("error unmarshaling argument: %w", err) } // Add the argument value to the slice @@ -161,7 +161,7 @@ func callFnWithArgumentAsJSON(fn interface{}, argumentAsJSON string) (string, er if len(result) > 0 { jsonResultData, errMarshal := json.Marshal(result[0].Interface()) if errMarshal != nil { - return "", fmt.Errorf("error marshaling result: %s", errMarshal) + return "", fmt.Errorf("error marshaling result: %w", errMarshal) } return string(jsonResultData), nil } @@ -172,12 +172,12 @@ func callFnWithArgumentAsJSON(fn interface{}, argumentAsJSON string) (string, er func (o *OpenAI) functionCall(response openai.ChatCompletionResponse) (string, error) { fn, ok := o.functions[response.Choices[0].Message.FunctionCall.Name] if !ok { - return "", fmt.Errorf("%s: unknown function %s", ErrOpenAIChat, response.Choices[0].Message.FunctionCall.Name) + return "", fmt.Errorf("%w: unknown function %s", ErrOpenAIChat, response.Choices[0].Message.FunctionCall.Name) } resultAsJSON, err := callFnWithArgumentAsJSON(fn.Fn, response.Choices[0].Message.FunctionCall.Arguments) if err != nil { - return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return "", fmt.Errorf("%w: %w", ErrOpenAIChat, err) } o.calledFunctionName = &fn.Name diff --git a/llm/openai/openai.go b/llm/openai/openai.go index 309914ff..9307976c 100644 --- a/llm/openai/openai.go +++ b/llm/openai/openai.go @@ -174,8 +174,8 @@ func (o *OpenAI) Completion(ctx context.Context, prompt string) (string, error) cacheResult, err = o.cache.Get(ctx, prompt) if err == nil { return strings.Join(cacheResult.Answer, "\n"), nil - } else if err != cache.ErrCacheMiss { - return "", fmt.Errorf("%s: %w", ErrOpenAICompletion, err) + } else if !errors.Is(err, cache.ErrCacheMiss) { + return "", fmt.Errorf("%w: %w", ErrOpenAICompletion, err) } } @@ -187,7 +187,7 @@ func (o *OpenAI) Completion(ctx context.Context, prompt string) (string, error) if o.cache != nil { err = o.cache.Set(ctx, cacheResult.Embedding, outputs[0]) if err != nil { - return "", fmt.Errorf("%s: %w", ErrOpenAICompletion, err) + return "", fmt.Errorf("%w: %w", ErrOpenAICompletion, err) } } @@ -210,7 +210,7 @@ func (o *OpenAI) BatchCompletion(ctx context.Context, prompts []string) ([]strin ) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrOpenAICompletion, err) + return nil, fmt.Errorf("%w: %w", ErrOpenAICompletion, err) } if o.usageCallback != nil { @@ -218,7 +218,7 @@ func (o *OpenAI) BatchCompletion(ctx context.Context, prompts []string) ([]strin } if len(response.Choices) == 0 { - return nil, fmt.Errorf("%s: no choices returned", ErrOpenAICompletion) + return nil, fmt.Errorf("%w: no choices returned", ErrOpenAICompletion) } var outputs []string @@ -253,7 +253,7 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC }, ) if err != nil { - return fmt.Errorf("%s: %w", ErrOpenAICompletion, err) + return fmt.Errorf("%w: %w", ErrOpenAICompletion, err) } defer stream.Close() @@ -265,7 +265,7 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC } if errRecv != nil { - return fmt.Errorf("%s: %w", ErrOpenAICompletion, errRecv) + return fmt.Errorf("%w: %w", ErrOpenAICompletion, errRecv) } if o.usageCallback != nil { @@ -273,7 +273,7 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC } if len(response.Choices) == 0 { - return fmt.Errorf("%s: no choices returned", ErrOpenAICompletion) + return fmt.Errorf("%w: no choices returned", ErrOpenAICompletion) } for _, choice := range response.Choices { @@ -294,7 +294,7 @@ func (o *OpenAI) BatchCompletionStream(ctx context.Context, callbackFn []StreamC func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { messages, err := buildMessages(prompt) if err != nil { - return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return "", fmt.Errorf("%w: %w", ErrOpenAIChat, err) } chatCompletionRequest := openai.ChatCompletionRequest{ @@ -317,7 +317,7 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { ) if err != nil { - return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return "", fmt.Errorf("%w: %w", ErrOpenAIChat, err) } if o.usageCallback != nil { @@ -325,7 +325,7 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { } if len(response.Choices) == 0 { - return "", fmt.Errorf("%s: no choices returned", ErrOpenAIChat) + return "", fmt.Errorf("%w: no choices returned", ErrOpenAIChat) } content := response.Choices[0].Message.Content @@ -340,7 +340,7 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { content, err = o.functionCall(response) if err != nil { - return "", fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return "", fmt.Errorf("%w: %w", ErrOpenAIChat, err) } } @@ -355,7 +355,7 @@ func (o *OpenAI) Chat(ctx context.Context, prompt *chat.Chat) (string, error) { func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prompt *chat.Chat) error { messages, err := buildMessages(prompt) if err != nil { - return fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return fmt.Errorf("%w: %w", ErrOpenAIChat, err) } stream, err := o.openAIClient.CreateChatCompletionStream( @@ -371,7 +371,7 @@ func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prom }, ) if err != nil { - return fmt.Errorf("%s: %w", ErrOpenAIChat, err) + return fmt.Errorf("%w: %w", ErrOpenAIChat, err) } for { @@ -386,7 +386,7 @@ func (o *OpenAI) ChatStream(ctx context.Context, callbackFn StreamCallback, prom // } if len(response.Choices) == 0 { - return fmt.Errorf("%s: no choices returned", ErrOpenAIChat) + return fmt.Errorf("%w: no choices returned", ErrOpenAIChat) } content := response.Choices[0].Delta.Content diff --git a/loader/csv.go b/loader/csv.go index c4f95b53..b38c972e 100644 --- a/loader/csv.go +++ b/loader/csv.go @@ -3,6 +3,7 @@ package loader import ( "context" "encoding/csv" + "errors" "fmt" "io" "log" @@ -51,7 +52,7 @@ func (c *CSVLoader) Load(ctx context.Context) ([]document.Document, error) { documents, err := c.readCSV() if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } return documents, nil @@ -60,11 +61,11 @@ func (c *CSVLoader) Load(ctx context.Context) ([]document.Document, error) { func (c *CSVLoader) validate() error { fileStat, err := os.Stat(c.filename) if err != nil { - return fmt.Errorf("%s: %w", ErrorInternal, err) + return fmt.Errorf("%w: %w", ErrInternal, err) } if fileStat.IsDir() { - return fmt.Errorf("%s: %w", ErrorInternal, os.ErrNotExist) + return fmt.Errorf("%w: %w", ErrInternal, os.ErrNotExist) } return nil @@ -86,7 +87,7 @@ func (c *CSVLoader) readCSV() ([]document.Document, error) { for { record, errRead := reader.Read() - if errRead == io.EOF { + if errors.Is(errRead, io.EOF) { break } if errRead != nil { diff --git a/loader/directory.go b/loader/directory.go index ed12124d..ee93cca0 100644 --- a/loader/directory.go +++ b/loader/directory.go @@ -68,11 +68,11 @@ func (d *DirectoryLoader) Load(ctx context.Context) ([]document.Document, error) func (d *DirectoryLoader) validate() error { fileStat, err := os.Stat(d.dirname) if err != nil { - return fmt.Errorf("%s: %w", ErrorInternal, err) + return fmt.Errorf("%w: %w", ErrInternal, err) } if !fileStat.IsDir() { - return fmt.Errorf("%s: %w", ErrorInternal, os.ErrNotExist) + return fmt.Errorf("%w: %w", ErrInternal, os.ErrNotExist) } return nil diff --git a/loader/hf_image_to_text.go b/loader/hf_image_to_text.go index ff6bea3a..b0ad4a8c 100644 --- a/loader/hf_image_to_text.go +++ b/loader/hf_image_to_text.go @@ -58,18 +58,18 @@ func (h *HFImageToText) WithTextSplitter(textSplitter TextSplitter) *HFImageToTe func (h *HFImageToText) Load(ctx context.Context) ([]document.Document, error) { err := isFile(h.mediaFile) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } responseBytes, err := hfMediaHTTPCall(ctx, h.token, h.model, h.mediaFile) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } responses := []*hfImageToTextResponse{} err = json.Unmarshal(responseBytes, &responses) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } var documents []document.Document diff --git a/loader/hf_speech_recognition.go b/loader/hf_speech_recognition.go index 23505923..3943795c 100644 --- a/loader/hf_speech_recognition.go +++ b/loader/hf_speech_recognition.go @@ -53,18 +53,18 @@ func (h *HFSpeechRecognition) WithTextSplitter(textSplitter TextSplitter) *HFSpe func (h *HFSpeechRecognition) Load(ctx context.Context) ([]document.Document, error) { err := isFile(h.mediaFile) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } responseBytes, err := hfMediaHTTPCall(ctx, h.token, h.model, h.mediaFile) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } response := hfSpeechRecognitionResponse{} err = json.Unmarshal(responseBytes, &response) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } var documents []document.Document diff --git a/loader/loader.go b/loader/loader.go index 5c647244..cfa8c6ce 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -8,7 +8,7 @@ import ( ) var ( - ErrorInternal = fmt.Errorf("internal error") + ErrInternal = fmt.Errorf("internal error") ) const ( @@ -26,11 +26,11 @@ type Loader struct { func isFile(filename string) error { fileStat, err := os.Stat(filename) if err != nil { - return fmt.Errorf("%s: %w", ErrorInternal, err) + return fmt.Errorf("%w: %w", ErrInternal, err) } if fileStat.IsDir() { - return fmt.Errorf("%s: %w", ErrorInternal, os.ErrNotExist) + return fmt.Errorf("%w: %w", ErrInternal, os.ErrNotExist) } return nil diff --git a/loader/text.go b/loader/text.go index 0f60cbe2..4ef42832 100644 --- a/loader/text.go +++ b/loader/text.go @@ -37,7 +37,7 @@ func (t *TextLoader) Load(ctx context.Context) ([]document.Document, error) { text, err := os.ReadFile(t.filename) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } documents := []document.Document{ @@ -60,7 +60,7 @@ func (t *TextLoader) validate() error { } else { _, ok := t.metadata[SourceMetadataKey] if ok { - return fmt.Errorf("%s: metadata key %s is reserved", ErrorInternal, SourceMetadataKey) + return fmt.Errorf("%w: metadata key %s is reserved", ErrInternal, SourceMetadataKey) } } @@ -68,11 +68,11 @@ func (t *TextLoader) validate() error { fileStat, err := os.Stat(t.filename) if err != nil { - return fmt.Errorf("%s: %w", ErrorInternal, err) + return fmt.Errorf("%w: %w", ErrInternal, err) } if fileStat.IsDir() { - return fmt.Errorf("%s: %w", ErrorInternal, os.ErrNotExist) + return fmt.Errorf("%w: %w", ErrInternal, os.ErrNotExist) } return nil diff --git a/loader/whisper.go b/loader/whisper.go index 0edf2dff..a94bee70 100644 --- a/loader/whisper.go +++ b/loader/whisper.go @@ -43,7 +43,7 @@ func (w *WhisperLoader) Load(ctx context.Context) ([]document.Document, error) { } resp, err := w.openAIClient.CreateTranscription(ctx, req) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrorInternal, err) + return nil, fmt.Errorf("%w: %w", ErrInternal, err) } documents := []document.Document{ diff --git a/pipeline/splitter.go b/pipeline/splitter.go index f5a95d88..c320988e 100644 --- a/pipeline/splitter.go +++ b/pipeline/splitter.go @@ -46,7 +46,7 @@ func (s *Splitter) WithMemory(name string, memory Memory) *Splitter { func (s *Splitter) Run(ctx context.Context, input types.M) (types.M, error) { splittedInputs, err := s.splitterFn(input) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrSplitFunction, err) + return nil, fmt.Errorf("%w: %w", ErrSplitFunction, err) } var wg sync.WaitGroup diff --git a/pipeline/tube.go b/pipeline/tube.go index 244272f6..aae95800 100644 --- a/pipeline/tube.go +++ b/pipeline/tube.go @@ -56,7 +56,7 @@ func (t *Tube) Run(ctx context.Context, input types.M) (types.M, error) { input, err := structToMap(input) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrDecoding, err) + return nil, fmt.Errorf("%w: %w", ErrDecoding, err) } if t.memory != nil { @@ -65,12 +65,12 @@ func (t *Tube) Run(ctx context.Context, input types.M) (types.M, error) { response, err := t.executeLLM(ctx, input) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrLLMExecution, err) + return nil, fmt.Errorf("%w: %w", ErrLLMExecution, err) } decodedOutput, err := t.decoder.Decode(response) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrDecoding, err) + return nil, fmt.Errorf("%w: %w", ErrDecoding, err) } if t.memory != nil { @@ -176,7 +176,7 @@ func structToMap(obj interface{}) (types.M, error) { genericMap := types.M{} err := mapstructure.Decode(obj, &genericMap) if err != nil { - return nil, fmt.Errorf("%s: %w", ErrDecoding, err) + return nil, fmt.Errorf("%w: %w", ErrDecoding, err) } return genericMap, nil diff --git a/prompt/template.go b/prompt/template.go index e50e13b8..b0eeafaa 100644 --- a/prompt/template.go +++ b/prompt/template.go @@ -44,7 +44,7 @@ func (t *Template) Format(input types.M) error { input, err = structToMap(input) if err != nil { - return fmt.Errorf("%s: %w", ErrDecoding, err) + return fmt.Errorf("%w: %w", ErrDecoding, err) } overallMap := mergeMaps(t.input.(types.M), input) @@ -52,7 +52,7 @@ func (t *Template) Format(input types.M) error { var buffer bytes.Buffer err = t.templateEngine.Execute(&buffer, overallMap) if err != nil { - return fmt.Errorf("%s: %w", ErrTemplateEngine, err) + return fmt.Errorf("%w: %w", ErrTemplateEngine, err) } t.value = buffer.String() @@ -71,7 +71,7 @@ func (t *Template) initTemplateEngine() error { templateEngine, err := texttemplate.New("prompt").Option("missingkey=zero").Parse(t.template) if err != nil { - return fmt.Errorf("%s: %w", ErrTemplateEngine, err) + return fmt.Errorf("%w: %w", ErrTemplateEngine, err) } t.templateEngine = templateEngine From 8b738911eb3d83a8fd1ee13c96dcfad0a8057325 Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:43:40 +0200 Subject: [PATCH 5/9] chore: add gocognit --- .golangci.yml | 39 +++++++++++++++++++++++++++++++++--- pipeline/pipeline.go | 2 ++ pipeline/sql/mysql.go | 1 + pipeline/sql/sqlite.go | 1 + textsplitter/textsplitter.go | 1 + 5 files changed, 41 insertions(+), 3 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 241be6e8..5939032d 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -167,14 +167,47 @@ linters: - bodyclose - contextcheck - cyclop - - dupl # Tool for code clone detection - - durationcheck # check for two durations multiplied together + - dupl + - durationcheck - errname - errorlint + - execinquery + - exhaustive + - exportloopref + # - forbidigo + - funlen + # - gochecknoinits + - gocognit - goconst - goimports - lll - revive - unconvert - whitespace - fast: false \ No newline at end of file + fast: false + + +issues: + # Maximum count of issues with the same text. + # Set to 0 to disable. + # Default: 3 + max-same-issues: 50 + + exclude-rules: + - source: "^//\\s*go:generate\\s" + linters: [ lll ] + - source: "(noinspection|TODO)" + linters: [ godot ] + - source: "//noinspection" + linters: [ gocritic ] + - source: "^\\s+if _, ok := err\\.\\([^.]+\\.InternalError\\); ok {" + linters: [ errorlint ] + - path: "_test\\.go" + linters: + - bodyclose + - dupl + - funlen + - goconst + - gosec + - noctx + - wrapcheck \ No newline at end of file diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 2a4eec7e..a9c96490 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -73,6 +73,8 @@ func (p *Pipeline) WithPostCallbacks(callbacks ...Callback) *Pipeline { } // Run chains the steps of the pipeline and returns the output of the last step. +// +//nolint:gocognit func (p Pipeline) Run(ctx context.Context, input types.M) (types.M, error) { var err error currentTube := 0 diff --git a/pipeline/sql/mysql.go b/pipeline/sql/mysql.go index b879733f..b83375ae 100644 --- a/pipeline/sql/mysql.go +++ b/pipeline/sql/mysql.go @@ -14,6 +14,7 @@ const mysqlDataSourcePromptTemplate = "\n" + "Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\n" + "Pay attention to use CURDATE() function to get the current date, if the question involves \"today\"." +//nolint:funlen,gocognit func getMySQLSchema(db *sql.DB, dbName string) (string, error) { var schema string diff --git a/pipeline/sql/sqlite.go b/pipeline/sql/sqlite.go index 42a0dc9c..119f66c1 100644 --- a/pipeline/sql/sqlite.go +++ b/pipeline/sql/sqlite.go @@ -14,6 +14,7 @@ Never query for all columns from a table. You must query only the columns that a Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table. Pay attention to use date('now') function to get the current date, if the question involves "today".` +//nolint:funlen,gocognit func getSqliteSchema(db *sql.DB) (string, error) { var schema string diff --git a/textsplitter/textsplitter.go b/textsplitter/textsplitter.go index fc0e7fc9..fc2cafeb 100644 --- a/textsplitter/textsplitter.go +++ b/textsplitter/textsplitter.go @@ -13,6 +13,7 @@ type TextSplitter struct { lengthFunction LenFunction } +//nolint:gocognit func (t *TextSplitter) mergeSplits(splits []string, separator string) []string { docs := make([]string, 0) currentDoc := make([]string, 0) From 81cf651d4ee0f4e9da85347728c4372b5d69c7b2 Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:48:57 +0200 Subject: [PATCH 6/9] chore: add nonilerr --- .golangci.yml | 11 +++++++++++ embedder/huggingface/http.go | 2 ++ llm/huggingface/http.go | 2 ++ loader/hf_image_to_text.go | 2 ++ transformer/visual-question-answering.go | 2 ++ 5 files changed, 19 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 5939032d..4f9e7f83 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -179,8 +179,19 @@ linters: # - gochecknoinits - gocognit - goconst + # - gocritic this can be enabled + - gocyclo + # - godot - goimports + # - gomnd this can be enabled + - gomoddirectives + - gomodguard + - goprintffuncname + - gosec - lll + - makezero + - nestif + - nilerr - revive - unconvert - whitespace diff --git a/embedder/huggingface/http.go b/embedder/huggingface/http.go index 0634072d..c07097dc 100644 --- a/embedder/huggingface/http.go +++ b/embedder/huggingface/http.go @@ -56,6 +56,7 @@ func checkRespForError(respJSON []byte) error { apiErr := apiError{} err := json.Unmarshal(buf, &apiErr) if err != nil { + //nolint:nilerr return nil } if apiErr.Error != "" { @@ -69,6 +70,7 @@ func checkRespForError(respJSON []byte) error { apiErrs := apiErrors{} err := json.Unmarshal(buf, &apiErrs) if err != nil { + //nolint:nilerr return nil } if apiErrs.Errors != nil { diff --git a/llm/huggingface/http.go b/llm/huggingface/http.go index 00c229ac..126574de 100644 --- a/llm/huggingface/http.go +++ b/llm/huggingface/http.go @@ -54,6 +54,7 @@ func checkRespForError(respJSON []byte) error { apiErr := apiError{} err := json.Unmarshal(buf, &apiErr) if err != nil { + //nolint:nilerr return nil } if apiErr.Error != "" { @@ -67,6 +68,7 @@ func checkRespForError(respJSON []byte) error { apiErrs := apiErrors{} err := json.Unmarshal(buf, &apiErrs) if err != nil { + //nolint:nilerr return nil } if apiErrs.Errors != nil { diff --git a/loader/hf_image_to_text.go b/loader/hf_image_to_text.go index b0ad4a8c..133525c5 100644 --- a/loader/hf_image_to_text.go +++ b/loader/hf_image_to_text.go @@ -145,6 +145,7 @@ func hfCheckHTTPResponse(respJSON []byte) error { apiErr := apiError{} err := json.Unmarshal(buf, &apiErr) if err != nil { + //nolint:nilerr return nil } if apiErr.Error != "" { @@ -158,6 +159,7 @@ func hfCheckHTTPResponse(respJSON []byte) error { apiErrs := apiErrors{} err := json.Unmarshal(buf, &apiErrs) if err != nil { + //nolint:nilerr return nil } if apiErrs.Errors != nil { diff --git a/transformer/visual-question-answering.go b/transformer/visual-question-answering.go index 7d942549..288308d7 100644 --- a/transformer/visual-question-answering.go +++ b/transformer/visual-question-answering.go @@ -140,6 +140,7 @@ func hfCheckHTTPResponse(respJSON []byte) error { apiErr := apiError{} err := json.Unmarshal(buf, &apiErr) if err != nil { + //nolint:nilerr return nil } if apiErr.Error != "" { @@ -153,6 +154,7 @@ func hfCheckHTTPResponse(respJSON []byte) error { apiErrs := apiErrors{} err := json.Unmarshal(buf, &apiErrs) if err != nil { + //nolint:nilerr return nil } if apiErrs.Errors != nil { From 8e8cce7623190dc57190638957607a6d00553a8c Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:54:34 +0200 Subject: [PATCH 7/9] chore: add rows check --- .golangci.yml | 6 ++++++ pipeline/sql/mysql.go | 10 ++++++++++ pipeline/sql/sql.go | 3 +++ pipeline/sql/sqlite.go | 10 ++++++++++ 4 files changed, 29 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 4f9e7f83..6926bebe 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -192,7 +192,13 @@ linters: - makezero - nestif - nilerr + - nilnil + # - nolintlint this can be enabled + - nosprintfhostport + - predeclared + - promlinter - revive + - rowserrcheck - unconvert - whitespace fast: false diff --git a/pipeline/sql/mysql.go b/pipeline/sql/mysql.go index b83375ae..3ce15d63 100644 --- a/pipeline/sql/mysql.go +++ b/pipeline/sql/mysql.go @@ -24,6 +24,10 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { if err != nil { return "", err } + err = rows.Err() + if err != nil { + return "", err + } defer rows.Close() // Loop through tables and retrieve schema @@ -38,6 +42,9 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { if errQuery != nil { return "", errQuery } + if errRows := cols.Err(); errRows != nil { + return "", errRows + } defer cols.Close() // Build CREATE TABLE statement @@ -87,6 +94,9 @@ func getMySQLSchema(db *sql.DB, dbName string) (string, error) { if errQuery != nil { return "", errQuery } + if errRows := fks.Err(); errRows != nil { + return "", errRows + } defer fks.Close() // Build foreign key definitions diff --git a/pipeline/sql/sql.go b/pipeline/sql/sql.go index 0e5c4fbd..f97ac793 100644 --- a/pipeline/sql/sql.go +++ b/pipeline/sql/sql.go @@ -241,6 +241,9 @@ func getSQLResult(db *sql.DB, query string) (string, error) { if err != nil { return "", err } + if err = rows.Err(); err != nil { + return "", err + } defer rows.Close() content := "" diff --git a/pipeline/sql/sqlite.go b/pipeline/sql/sqlite.go index 119f66c1..ee2adc0a 100644 --- a/pipeline/sql/sqlite.go +++ b/pipeline/sql/sqlite.go @@ -23,6 +23,10 @@ func getSqliteSchema(db *sql.DB) (string, error) { if err != nil { return "", err } + err = rows.Err() + if err != nil { + return "", err + } defer rows.Close() // Loop through tables and retrieve schema @@ -37,6 +41,9 @@ func getSqliteSchema(db *sql.DB) (string, error) { if errQuery != nil { return "", errQuery } + if errRows := cols.Err(); errRows != nil { + return "", errRows + } defer cols.Close() // Build CREATE TABLE statement @@ -77,6 +84,9 @@ func getSqliteSchema(db *sql.DB) (string, error) { if errQuery != nil { return "", errQuery } + if errRows := fks.Err(); errRows != nil { + return "", errRows + } defer fks.Close() // Build foreign key definitions From 4f36e29e9e4a25b8a491bba3f4ef96f73d908af7 Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 01:59:57 +0200 Subject: [PATCH 8/9] chore: good lint --- .golangci.yml | 36 +++++++----------------------------- memory/ram/ram.go | 2 +- 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 6926bebe..abd0e282 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -83,24 +83,6 @@ linters-settings: - strconv.ParseInt - strconv.ParseUint - gomodguard: - blocked: - # List of blocked modules. - # Default: [] - modules: - - github.com/golang/protobuf: - recommendations: - - google.golang.org/protobuf - reason: "see https://developers.google.com/protocol-buffers/docs/reference/go/faq#modules" - - github.com/satori/go.uuid: - recommendations: - - github.com/google/uuid - reason: "satori's package is not maintained" - - github.com/gofrs/uuid: - recommendations: - - github.com/google/uuid - reason: "see recommendation from dev-infra team: https://confluence.gtforge.com/x/gQI6Aw" - govet: # Enable all analyzers. # Default: false @@ -117,11 +99,6 @@ linters-settings: # Default: false strict: true - nakedret: - # Make an issue if func has more lines of code than this setting, and it has naked returns. - # Default: 30 - max-func-lines: 0 - nolintlint: # Exclude following linters from requiring an explanation. # Default: [] @@ -133,12 +110,6 @@ linters-settings: # Default: false require-specific: true - rowserrcheck: - # database/sql is always checked - # Default: [] - packages: - - github.com/jmoiron/sqlx - tenv: # The option `all` will run against whole test files (`_test.go`) regardless of method/function signatures. # Otherwise, only methods that take `*testing.T`, `*testing.B`, and `testing.TB` as arguments are checked. @@ -199,7 +170,14 @@ linters: - promlinter - revive - rowserrcheck + - sqlclosecheck + - stylecheck + - tenv + # - testpackage + - tparallel - unconvert + - unparam + - wastedassign - whitespace fast: false diff --git a/memory/ram/ram.go b/memory/ram/ram.go index 3ebb232a..a30c4b2d 100644 --- a/memory/ram/ram.go +++ b/memory/ram/ram.go @@ -11,7 +11,7 @@ var ( ErrObjectNotFound = errors.New("object not found") ) -//nolint:revive +//nolint:revive,stylecheck type Ram struct { memory types.M } From 95301060a0bd273a1feee990c4767c0ba80aa36e Mon Sep 17 00:00:00 2001 From: Simone Vellei Date: Fri, 15 Sep 2023 02:07:07 +0200 Subject: [PATCH 9/9] chore: fix --- textsplitter/textsplitter.go | 2 ++ transformer/dall-e.go | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/textsplitter/textsplitter.go b/textsplitter/textsplitter.go index fc2cafeb..1f3492ff 100644 --- a/textsplitter/textsplitter.go +++ b/textsplitter/textsplitter.go @@ -31,7 +31,9 @@ func (t *TextSplitter) mergeSplits(splits []string, separator string) []string { docs = append(docs, doc) } for (total > t.chunkOverlap) || (getSLen(currentDoc, separator, 0) > t.chunkSize) && total > 0 { + //nolint:gosec total -= t.lengthFunction(currentDoc[0]) + getSLen(currentDoc, separator, 1) + //nolint:gosec currentDoc = currentDoc[1:] } } diff --git a/transformer/dall-e.go b/transformer/dall-e.go index bea9a821..1c38aa74 100644 --- a/transformer/dall-e.go +++ b/transformer/dall-e.go @@ -113,11 +113,13 @@ func (d *DallE) transformToFile(ctx context.Context, input string) (any, error) } defer file.Close() - if errEncode := png.Encode(file, imgData.(image.Image)); errEncode != nil { - return nil, errEncode + err = png.Encode(file, imgData.(image.Image)) + if err != nil { + return nil, err } - return nil, nil + var output interface{} + return output, nil } func (d *DallE) transformToImage(ctx context.Context, input string) (any, error) {