mega update - Claude AI + vision

radiantspace · Jul 7, 2024 · 659ddf3 · 659ddf3
1 parent 45bdd92
commit 659ddf3
Show file tree

Hide file tree

Showing 26 changed files with 1,304 additions and 158 deletions.
diff --git a/.github/workflows/deploy-to-doks.yaml b/.github/workflows/deploy-to-doks.yaml
@@ -70,6 +70,7 @@ jobs:
           BACKEND_BASE_URL: ${{ secrets.BACKEND_BASE_URL }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
           SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
           SLACK_SIGNING_SECRET: ${{ secrets.SLACK_SIGNING_SECRET }}
           STRIPE_ENDPOINT_SECRET: ${{ secrets.STRIPE_ENDPOINT_SECRET }}
@@ -83,6 +84,7 @@ jobs:
           echo BACKEND_BASE_URL=$BACKEND_BASE_URL > environment-properties.env
           echo OPENAI_API_KEY=$OPENAI_API_KEY >> environment-properties.env
           echo FIREWORKS_API_KEY=$FIREWORKS_API_KEY >> environment-properties.env
+          echo CLAUDE_API_KEY=$CLAUDE_API_KEY >> environment-properties.env
           echo SLACK_BOT_TOKEN=$SLACK_BOT_TOKEN >> environment-properties.env
           echo SLACK_SIGNING_SECRET=$SLACK_SIGNING_SECRET >> environment-properties.env
           echo STRIPE_ENDPOINT_SECRET=$STRIPE_ENDPOINT_SECRET >> environment-properties.env

diff --git a/README.md b/README.md
@@ -41,10 +41,10 @@ in Slack:
 - [x] Video/Audio transcription
 - [x] Video/Audio summary
 - [x] Voice response (OpenAI TTS)
-- [x] Threads, i.e. context awareness and memory (OpenAI Assistant Threads)
-- [ ] Image recognition
-- [x] Image generation
-
+- [x] Threads, i.e. context awareness and memory (OpenAI Assistant Threads, Mongo DB persistent threads)
+- [x] Image recognition
+- [x] Image generation (OpenAI DALL-E)
+- [ ] Document/PDF reading and reasoning
 
 ## Telegram Features
 
@@ -76,7 +76,7 @@ https://github.com/radiantspace/talk2robots/assets/8243681/3cfd47c4-55ed-4eb3-9f
 ## Slack Features
 
 - [x] Chat in DM, private and public channels and/or threads
-- [x] Chat with GPT-3.5 `/chatgpt`. The bot remembers the context of the conversation until you say `/clear`.
+- [x] Chat `/chatgpt`. The bot remembers the context of the conversation until you say `/clear`.
 - [x] Use `/grammar` mode just to correct grammar, you can also add :eyeglasses: emoji to a message to get grammar correction reply
 - [x] Summarize message thread by adding :memo: emoji to a message
 - [ ] Use `/teacher` mode to correct and explain grammar
@@ -102,6 +102,8 @@ https://github.com/radiantspace/talk2robots/assets/8243681/c5805c11-716b-47ab-9f
 - [x] Get user info `/user`
 - [x] Get users count `/userscount`
 - [x] Get users count for a subscription `/usersforsubscription`
+- [x] `/sendmessagetoauser` to send a message to a user
+- [x] `/sendmessagetousers` to send a message to users (e.g. to notify about maintenance or marketing)
 
 
 ## 🏗️ Architecture overview
@@ -135,6 +137,7 @@ graph LR
 
     A[Backend] <--> D[OpenAI API]
     A[Backend] <--> E[FireworksAI API]
+    A[Backend] <--> I[ClaudeAI API]
     A[Backend] -.-> P[DataDog] 
     A[Backend] <-.-> N[Stripe]
 

diff --git a/backend/app/ai/ai.go b/backend/app/ai/ai.go
@@ -59,3 +59,11 @@ func IsFireworksAI(model models.Engine) bool {
 
 	return false
 }
+
+func IsClaudeAI(model models.Engine) bool {
+	if model == models.Sonet35 || model == models.Haiku3 || model == models.Opus3 {
+		return true
+	}
+
+	return false
+}
diff --git a/backend/app/ai/chat.go b/backend/app/ai/chat.go
@@ -11,6 +11,7 @@ import (
 	"io"
 	"math"
 	"net/http"
+	"talk2robots/m/v2/app/ai/claude"
 	"talk2robots/m/v2/app/ai/sse"
 	"talk2robots/m/v2/app/config"
 	"talk2robots/m/v2/app/models"
@@ -40,9 +41,22 @@ const (
 	CHAT_GPT4O_INPUT_PRICE  = 5.0 / 1000000
 	CHAT_GPT4O_OUTPUT_PRICE = 15.0 / 1000000
 
+	// fireworks
 	FIREWORKS_16B_80B_PRICE = 0.9 / 1000000
 	FIREWORKS_0B_16B_PRICE  = 0.2 / 1000000
 
+	// claude haiku 3.0
+	HAIKU_INPUT_PRICE  = 0.25 / 1000000
+	HAIKU_OUTPUT_PRICE = 1.25 / 1000000
+
+	// claude sonet 3.5
+	SONET_INPUT_PRICE  = 3.0 / 1000000
+	SONET_OUTPUT_PRICE = 15.0 / 1000000
+
+	// claude opus 3.0
+	OPUS_INPUT_PRICE  = 15.0 / 1000000
+	OPUS_OUTPUT_PRICE = 75.0 / 1000000
+
 	CHARS_PER_TOKEN = 2.0 // average number of characters per token, must be tuned or moved to tiktoken
 )
 
@@ -52,6 +66,11 @@ func (a *API) ChatComplete(ctx context.Context, completion models.ChatCompletion
 	if completion.Model == "" {
 		completion.Model = string(models.ChatGpt35Turbo)
 	}
+
+	if IsClaudeAI(models.Engine(completion.Model)) {
+		return a.chatCompleteClaude(ctx, completion)
+	}
+
 	promptTokens := 0.0
 	for _, message := range completion.Messages {
 		promptTokens += 4 + ApproximateTokensCount(message.Content)
@@ -121,9 +140,16 @@ func (a *API) ChatCompleteStreaming(ctx context.Context, completion models.ChatM
 	if completion.Model == "" {
 		completion.Model = string(models.ChatGpt35Turbo)
 	}
+
+	if IsClaudeAI(models.Engine(completion.Model)) {
+		return chatCompleteStreamingClaude(ctx, completion, cancelContext)
+	}
+
 	promptTokens := 0.0
 	for _, message := range completion.Messages {
-		promptTokens += 4 + ApproximateTokensCount(message.Content[0].Text)
+		for _, content := range message.Content {
+			promptTokens += 4 + ApproximateTokensCount(content.Text)
+		}
 	}
 	if completion.MaxTokens == 0 {
 		// calculate max tokens based on prompt words count
@@ -200,25 +226,24 @@ func (a *API) ChatCompleteStreaming(ctx context.Context, completion models.ChatM
 }
 
 func urlFromModel(model models.Engine) string {
-	switch IsFireworksAI(model) {
-	case true:
+	if IsFireworksAI(model) {
 		return "https://api.fireworks.ai/inference/v1/chat/completions"
-	case false:
-		return "https://api.openai.com/v1/chat/completions"
 	}
-
-	return ""
+	if IsClaudeAI(model) {
+		return "https://api.anthropic.com/v1/messages"
+	}
+	return "https://api.openai.com/v1/chat/completions"
 }
 
 func authTokenFromModel(model models.Engine) string {
-	switch IsFireworksAI(model) {
-	case true:
+	if IsFireworksAI(model) {
 		return config.CONFIG.FireworksAPIKey
-	case false:
-		return config.CONFIG.OpenAIAPIKey
+	}
+	if IsClaudeAI(model) {
+		return config.CONFIG.ClaudeAPIKey
 	}
 
-	return ""
+	return config.CONFIG.OpenAIAPIKey
 }
 
 // if this snippet will make too much mistakes, we can use this
@@ -239,6 +264,10 @@ func PricePerInputToken(model models.Engine) float64 {
 		return FIREWORKS_16B_80B_PRICE
 	case models.ChatGpt4o:
 		return CHAT_GPT4O_INPUT_PRICE
+	case models.Sonet35:
+		return SONET_INPUT_PRICE
+	case models.Haiku3:
+		return HAIKU_INPUT_PRICE
 	default:
 		return CHAT_INPUT_PRICE
 	}
@@ -256,6 +285,10 @@ func PricePerOutputToken(model models.Engine) float64 {
 		return FIREWORKS_16B_80B_PRICE
 	case models.ChatGpt4o:
 		return CHAT_GPT4O_OUTPUT_PRICE
+	case models.Sonet35:
+		return SONET_OUTPUT_PRICE
+	case models.Haiku3:
+		return HAIKU_OUTPUT_PRICE
 	default:
 		return CHAT_OUTPUT_PRICE
 	}
@@ -272,6 +305,8 @@ func LimitPromptTokensForModel(model models.Engine, promptTokensCount float64) i
 		return int(math.Min(15*1024, promptTokensCount))
 	case models.LlamaV3_70b, models.LlamaV3_8b:
 		return int(math.Min(7*1024, promptTokensCount))
+	case models.Sonet35, models.Haiku3, models.Opus3:
+		return int(math.Min(199*1024, promptTokensCount))
 	default:
 		return int(math.Min(3*1024, promptTokensCount))
 	}
@@ -286,3 +321,183 @@ func maxTokensForModel(model models.Engine, promptTokensCount float64) float64 {
 		return 2000
 	}
 }
+
+func (a *API) chatCompleteClaude(ctx context.Context, completion models.ChatCompletion) (string, error) {
+	timeNow := time.Now()
+	promptTokens := 0.0
+	completion, systemPrompt := claude.Convert(completion)
+	for _, message := range completion.Messages {
+		promptTokens += 4 + ApproximateTokensCount(message.Content)
+	}
+	if completion.MaxTokens == 0 {
+		// calculate max tokens based on prompt words count
+		completion.MaxTokens = int(maxTokensForModel(models.Engine(completion.Model), promptTokens))
+	}
+
+	usage := models.CostAndUsage{
+		Engine:             models.Engine(completion.Model),
+		PricePerInputUnit:  PricePerInputToken(models.Engine(completion.Model)),
+		PricePerOutputUnit: PricePerOutputToken(models.Engine(completion.Model)),
+		Cost:               0,
+		Usage:              models.Usage{},
+	}
+
+	data := map[string]interface{}{
+		"max_tokens": completion.MaxTokens,
+		"messages":   completion.Messages,
+		"model":      completion.Model,
+		"system":     systemPrompt,
+	}
+
+	body, err := json.Marshal(data)
+	if err != nil {
+		return "", err
+	}
+
+	url := urlFromModel(models.Engine(completion.Model))
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(body))
+	if err != nil {
+		return "", err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("x-api-key", authTokenFromModel(models.Engine(completion.Model)))
+	req.Header.Set("anthropic-version", "2023-06-01")
+
+	status := fmt.Sprintf("status:%d", 0)
+	resp, err := a.client.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+	defer func() {
+		config.CONFIG.DataDogClient.Timing("ai.chat_complete.latency", time.Since(timeNow), []string{status, "model:" + completion.Model}, 1)
+		config.CONFIG.DataDogClient.Timing("ai.chat_complete.latency_per_token", time.Since(timeNow), []string{status, "model:" + completion.Model}, float64(usage.Usage.CompletionTokens))
+	}()
+
+	if resp.StatusCode != http.StatusOK {
+		return "", errors.New("ChatComplete: " + resp.Status)
+	}
+
+	var response models.ClaudeChatResponse
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		if err == io.EOF {
+			return "", errors.New("ChatComplete: empty response")
+		}
+		return "", err
+	}
+	usage.Usage.PromptTokens = response.Usage.InputTokens
+	usage.Usage.CompletionTokens = response.Usage.OutputTokens
+
+	go payments.Bill(ctx, usage)
+	return *response.Content[0].Text, nil
+}
+
+func chatCompleteStreamingClaude(ctx context.Context, completion models.ChatMultimodalCompletion, cancelContext context.CancelFunc) (chan string, error) {
+	timeNow := time.Now()
+	promptTokens := 0.0
+	completion, systemPrompt := claude.ConvertMultimodal(completion)
+	if completion.MaxTokens == 0 {
+		// calculate max tokens based on prompt words count
+		completion.MaxTokens = int(maxTokensForModel(models.Engine(completion.Model), promptTokens))
+	}
+
+	usage := models.CostAndUsage{
+		Engine:             models.Engine(completion.Model),
+		PricePerInputUnit:  PricePerInputToken(models.Engine(completion.Model)),
+		PricePerOutputUnit: PricePerOutputToken(models.Engine(completion.Model)),
+		Cost:               0,
+		Usage:              models.Usage{},
+	}
+
+	data := map[string]interface{}{
+		"max_tokens": completion.MaxTokens,
+		"messages":   completion.Messages,
+		"model":      completion.Model,
+		"system":     systemPrompt,
+		"stream":     true,
+	}
+
+	body, err := json.Marshal(data)
+	if err != nil {
+		return nil, err
+	}
+
+	url := urlFromModel(models.Engine(completion.Model))
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(body))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("x-api-key", authTokenFromModel(models.Engine(completion.Model)))
+	req.Header.Set("anthropic-version", "2023-06-01")
+
+	messages := make(chan string)
+
+	go func() {
+		defer func() {
+			close(messages)
+			cancelContext()
+
+			usage.Usage.TotalTokens = usage.Usage.PromptTokens + usage.Usage.CompletionTokens
+			go payments.Bill(ctx, usage)
+			config.CONFIG.DataDogClient.Timing("ai.chat_complete_streaming.latency", time.Since(timeNow), []string{"model:" + completion.Model}, 1)
+			config.CONFIG.DataDogClient.Timing("ai.chat_complete_streaming.latency_per_token", time.Since(timeNow), []string{"model:" + completion.Model}, float64(usage.Usage.CompletionTokens))
+		}()
+
+		// event: message_start
+		// data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-5-sonnet-20240620", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}}
+
+		// event: content_block_start
+		// data: {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}}
+
+		// event: ping
+		// data: {"type": "ping"}
+
+		// event: content_block_delta
+		// data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello"}}
+
+		// event: content_block_delta
+		// data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "!"}}
+
+		// event: content_block_stop
+		// data: {"type": "content_block_stop", "index": 0}
+
+		// event: message_delta
+		// data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null}, "usage": {"output_tokens": 15}}
+
+		// event: message_stop
+		// data: {"type": "message_stop"}
+		client := sse.NewClientFromReq(req)
+		err := client.SubscribeWithContext(ctx, "", func(msg *sse.Event) {
+			var response models.ClaudeStreamEvent
+			if err := json.Unmarshal(msg.Data, &response); err != nil {
+				log.Errorf("ChatCompleteStreamingClaude couldn't parse response: %s, err: %v", string(msg.Data), err)
+				return
+			}
+			log.Debugf("ChatCompleteStreamingClaude got event: %s", string(msg.Data))
+
+			if *response.Type == "message_start" && response.Message.Usage != nil {
+				currentUsage := response.Message.Usage
+				log.Debugf("ChatCompleteStreamingClaude got message_start, input_tokens: %d, output_tokens: %d", currentUsage.InputTokens, currentUsage.OutputTokens)
+				usage.Usage.PromptTokens += currentUsage.InputTokens
+				usage.Usage.CompletionTokens += currentUsage.OutputTokens
+				log.Debugf("ChatCompleteStreamingClaude usage: %+v", usage.Usage)
+			}
+
+			if *response.Type == "message_delta" && response.Usage != nil {
+				currentUsage := response.Usage
+				log.Debugf("ChatCompleteStreamingClaude got message_delta, output_tokens: %d", currentUsage.OutputTokens)
+				usage.Usage.CompletionTokens += currentUsage.OutputTokens
+				log.Debugf("ChatCompleteStreamingClaude usage: %+v", usage.Usage)
+			}
+
+			if *response.Type == "content_block_delta" && response.Delta != nil && response.Delta.Text != nil {
+				messages <- *(*response.Delta).Text
+			}
+		})
+		if err != nil {
+			log.Errorf("ChatCompleteStreamingClaude couldn't subscribe: %v", err)
+		}
+	}()
+	return messages, nil
+}