Skip to content

Commit

Permalink
mega update - Claude AI + vision
Browse files Browse the repository at this point in the history
  • Loading branch information
radiantspace committed Jul 7, 2024
1 parent 45bdd92 commit 659ddf3
Show file tree
Hide file tree
Showing 26 changed files with 1,304 additions and 158 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/deploy-to-doks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ jobs:
BACKEND_BASE_URL: ${{ secrets.BACKEND_BASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_SIGNING_SECRET: ${{ secrets.SLACK_SIGNING_SECRET }}
STRIPE_ENDPOINT_SECRET: ${{ secrets.STRIPE_ENDPOINT_SECRET }}
Expand All @@ -83,6 +84,7 @@ jobs:
echo BACKEND_BASE_URL=$BACKEND_BASE_URL > environment-properties.env
echo OPENAI_API_KEY=$OPENAI_API_KEY >> environment-properties.env
echo FIREWORKS_API_KEY=$FIREWORKS_API_KEY >> environment-properties.env
echo CLAUDE_API_KEY=$CLAUDE_API_KEY >> environment-properties.env
echo SLACK_BOT_TOKEN=$SLACK_BOT_TOKEN >> environment-properties.env
echo SLACK_SIGNING_SECRET=$SLACK_SIGNING_SECRET >> environment-properties.env
echo STRIPE_ENDPOINT_SECRET=$STRIPE_ENDPOINT_SECRET >> environment-properties.env
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ in Slack:
- [x] Video/Audio transcription
- [x] Video/Audio summary
- [x] Voice response (OpenAI TTS)
- [x] Threads, i.e. context awareness and memory (OpenAI Assistant Threads)
- [ ] Image recognition
- [x] Image generation

- [x] Threads, i.e. context awareness and memory (OpenAI Assistant Threads, Mongo DB persistent threads)
- [x] Image recognition
- [x] Image generation (OpenAI DALL-E)
- [ ] Document/PDF reading and reasoning

## Telegram Features

Expand Down Expand Up @@ -76,7 +76,7 @@ https://github.com/radiantspace/talk2robots/assets/8243681/3cfd47c4-55ed-4eb3-9f
## Slack Features

- [x] Chat in DM, private and public channels and/or threads
- [x] Chat with GPT-3.5 `/chatgpt`. The bot remembers the context of the conversation until you say `/clear`.
- [x] Chat `/chatgpt`. The bot remembers the context of the conversation until you say `/clear`.
- [x] Use `/grammar` mode just to correct grammar, you can also add :eyeglasses: emoji to a message to get grammar correction reply
- [x] Summarize message thread by adding :memo: emoji to a message
- [ ] Use `/teacher` mode to correct and explain grammar
Expand All @@ -102,6 +102,8 @@ https://github.com/radiantspace/talk2robots/assets/8243681/c5805c11-716b-47ab-9f
- [x] Get user info `/user`
- [x] Get users count `/userscount`
- [x] Get users count for a subscription `/usersforsubscription`
- [x] `/sendmessagetoauser` to send a message to a user
- [x] `/sendmessagetousers` to send a message to users (e.g. to notify about maintenance or marketing)


## 🏗️ Architecture overview
Expand Down Expand Up @@ -135,6 +137,7 @@ graph LR
A[Backend] <--> D[OpenAI API]
A[Backend] <--> E[FireworksAI API]
A[Backend] <--> I[ClaudeAI API]
A[Backend] -.-> P[DataDog]
A[Backend] <-.-> N[Stripe]
Expand Down
8 changes: 8 additions & 0 deletions backend/app/ai/ai.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,11 @@ func IsFireworksAI(model models.Engine) bool {

return false
}

func IsClaudeAI(model models.Engine) bool {
if model == models.Sonet35 || model == models.Haiku3 || model == models.Opus3 {
return true
}

return false
}
239 changes: 227 additions & 12 deletions backend/app/ai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"io"
"math"
"net/http"
"talk2robots/m/v2/app/ai/claude"
"talk2robots/m/v2/app/ai/sse"
"talk2robots/m/v2/app/config"
"talk2robots/m/v2/app/models"
Expand Down Expand Up @@ -40,9 +41,22 @@ const (
CHAT_GPT4O_INPUT_PRICE = 5.0 / 1000000
CHAT_GPT4O_OUTPUT_PRICE = 15.0 / 1000000

// fireworks
FIREWORKS_16B_80B_PRICE = 0.9 / 1000000
FIREWORKS_0B_16B_PRICE = 0.2 / 1000000

// claude haiku 3.0
HAIKU_INPUT_PRICE = 0.25 / 1000000
HAIKU_OUTPUT_PRICE = 1.25 / 1000000

// claude sonet 3.5
SONET_INPUT_PRICE = 3.0 / 1000000
SONET_OUTPUT_PRICE = 15.0 / 1000000

// claude opus 3.0
OPUS_INPUT_PRICE = 15.0 / 1000000
OPUS_OUTPUT_PRICE = 75.0 / 1000000

CHARS_PER_TOKEN = 2.0 // average number of characters per token, must be tuned or moved to tiktoken
)

Expand All @@ -52,6 +66,11 @@ func (a *API) ChatComplete(ctx context.Context, completion models.ChatCompletion
if completion.Model == "" {
completion.Model = string(models.ChatGpt35Turbo)
}

if IsClaudeAI(models.Engine(completion.Model)) {
return a.chatCompleteClaude(ctx, completion)
}

promptTokens := 0.0
for _, message := range completion.Messages {
promptTokens += 4 + ApproximateTokensCount(message.Content)
Expand Down Expand Up @@ -121,9 +140,16 @@ func (a *API) ChatCompleteStreaming(ctx context.Context, completion models.ChatM
if completion.Model == "" {
completion.Model = string(models.ChatGpt35Turbo)
}

if IsClaudeAI(models.Engine(completion.Model)) {
return chatCompleteStreamingClaude(ctx, completion, cancelContext)
}

promptTokens := 0.0
for _, message := range completion.Messages {
promptTokens += 4 + ApproximateTokensCount(message.Content[0].Text)
for _, content := range message.Content {
promptTokens += 4 + ApproximateTokensCount(content.Text)
}
}
if completion.MaxTokens == 0 {
// calculate max tokens based on prompt words count
Expand Down Expand Up @@ -200,25 +226,24 @@ func (a *API) ChatCompleteStreaming(ctx context.Context, completion models.ChatM
}

func urlFromModel(model models.Engine) string {
switch IsFireworksAI(model) {
case true:
if IsFireworksAI(model) {
return "https://api.fireworks.ai/inference/v1/chat/completions"
case false:
return "https://api.openai.com/v1/chat/completions"
}

return ""
if IsClaudeAI(model) {
return "https://api.anthropic.com/v1/messages"
}
return "https://api.openai.com/v1/chat/completions"
}

func authTokenFromModel(model models.Engine) string {
switch IsFireworksAI(model) {
case true:
if IsFireworksAI(model) {
return config.CONFIG.FireworksAPIKey
case false:
return config.CONFIG.OpenAIAPIKey
}
if IsClaudeAI(model) {
return config.CONFIG.ClaudeAPIKey
}

return ""
return config.CONFIG.OpenAIAPIKey
}

// if this snippet will make too much mistakes, we can use this
Expand All @@ -239,6 +264,10 @@ func PricePerInputToken(model models.Engine) float64 {
return FIREWORKS_16B_80B_PRICE
case models.ChatGpt4o:
return CHAT_GPT4O_INPUT_PRICE
case models.Sonet35:
return SONET_INPUT_PRICE
case models.Haiku3:
return HAIKU_INPUT_PRICE
default:
return CHAT_INPUT_PRICE
}
Expand All @@ -256,6 +285,10 @@ func PricePerOutputToken(model models.Engine) float64 {
return FIREWORKS_16B_80B_PRICE
case models.ChatGpt4o:
return CHAT_GPT4O_OUTPUT_PRICE
case models.Sonet35:
return SONET_OUTPUT_PRICE
case models.Haiku3:
return HAIKU_OUTPUT_PRICE
default:
return CHAT_OUTPUT_PRICE
}
Expand All @@ -272,6 +305,8 @@ func LimitPromptTokensForModel(model models.Engine, promptTokensCount float64) i
return int(math.Min(15*1024, promptTokensCount))
case models.LlamaV3_70b, models.LlamaV3_8b:
return int(math.Min(7*1024, promptTokensCount))
case models.Sonet35, models.Haiku3, models.Opus3:
return int(math.Min(199*1024, promptTokensCount))
default:
return int(math.Min(3*1024, promptTokensCount))
}
Expand All @@ -286,3 +321,183 @@ func maxTokensForModel(model models.Engine, promptTokensCount float64) float64 {
return 2000
}
}

func (a *API) chatCompleteClaude(ctx context.Context, completion models.ChatCompletion) (string, error) {
timeNow := time.Now()
promptTokens := 0.0
completion, systemPrompt := claude.Convert(completion)
for _, message := range completion.Messages {
promptTokens += 4 + ApproximateTokensCount(message.Content)
}
if completion.MaxTokens == 0 {
// calculate max tokens based on prompt words count
completion.MaxTokens = int(maxTokensForModel(models.Engine(completion.Model), promptTokens))
}

usage := models.CostAndUsage{
Engine: models.Engine(completion.Model),
PricePerInputUnit: PricePerInputToken(models.Engine(completion.Model)),
PricePerOutputUnit: PricePerOutputToken(models.Engine(completion.Model)),
Cost: 0,
Usage: models.Usage{},
}

data := map[string]interface{}{
"max_tokens": completion.MaxTokens,
"messages": completion.Messages,
"model": completion.Model,
"system": systemPrompt,
}

body, err := json.Marshal(data)
if err != nil {
return "", err
}

url := urlFromModel(models.Engine(completion.Model))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(body))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("x-api-key", authTokenFromModel(models.Engine(completion.Model)))
req.Header.Set("anthropic-version", "2023-06-01")

status := fmt.Sprintf("status:%d", 0)
resp, err := a.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
defer func() {
config.CONFIG.DataDogClient.Timing("ai.chat_complete.latency", time.Since(timeNow), []string{status, "model:" + completion.Model}, 1)
config.CONFIG.DataDogClient.Timing("ai.chat_complete.latency_per_token", time.Since(timeNow), []string{status, "model:" + completion.Model}, float64(usage.Usage.CompletionTokens))
}()

if resp.StatusCode != http.StatusOK {
return "", errors.New("ChatComplete: " + resp.Status)
}

var response models.ClaudeChatResponse
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
if err == io.EOF {
return "", errors.New("ChatComplete: empty response")
}
return "", err
}
usage.Usage.PromptTokens = response.Usage.InputTokens
usage.Usage.CompletionTokens = response.Usage.OutputTokens

go payments.Bill(ctx, usage)
return *response.Content[0].Text, nil
}

func chatCompleteStreamingClaude(ctx context.Context, completion models.ChatMultimodalCompletion, cancelContext context.CancelFunc) (chan string, error) {
timeNow := time.Now()
promptTokens := 0.0
completion, systemPrompt := claude.ConvertMultimodal(completion)
if completion.MaxTokens == 0 {
// calculate max tokens based on prompt words count
completion.MaxTokens = int(maxTokensForModel(models.Engine(completion.Model), promptTokens))
}

usage := models.CostAndUsage{
Engine: models.Engine(completion.Model),
PricePerInputUnit: PricePerInputToken(models.Engine(completion.Model)),
PricePerOutputUnit: PricePerOutputToken(models.Engine(completion.Model)),
Cost: 0,
Usage: models.Usage{},
}

data := map[string]interface{}{
"max_tokens": completion.MaxTokens,
"messages": completion.Messages,
"model": completion.Model,
"system": systemPrompt,
"stream": true,
}

body, err := json.Marshal(data)
if err != nil {
return nil, err
}

url := urlFromModel(models.Engine(completion.Model))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(body))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("x-api-key", authTokenFromModel(models.Engine(completion.Model)))
req.Header.Set("anthropic-version", "2023-06-01")

messages := make(chan string)

go func() {
defer func() {
close(messages)
cancelContext()

usage.Usage.TotalTokens = usage.Usage.PromptTokens + usage.Usage.CompletionTokens
go payments.Bill(ctx, usage)
config.CONFIG.DataDogClient.Timing("ai.chat_complete_streaming.latency", time.Since(timeNow), []string{"model:" + completion.Model}, 1)
config.CONFIG.DataDogClient.Timing("ai.chat_complete_streaming.latency_per_token", time.Since(timeNow), []string{"model:" + completion.Model}, float64(usage.Usage.CompletionTokens))
}()

// event: message_start
// data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-5-sonnet-20240620", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}}

// event: content_block_start
// data: {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}}

// event: ping
// data: {"type": "ping"}

// event: content_block_delta
// data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello"}}

// event: content_block_delta
// data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "!"}}

// event: content_block_stop
// data: {"type": "content_block_stop", "index": 0}

// event: message_delta
// data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null}, "usage": {"output_tokens": 15}}

// event: message_stop
// data: {"type": "message_stop"}
client := sse.NewClientFromReq(req)
err := client.SubscribeWithContext(ctx, "", func(msg *sse.Event) {
var response models.ClaudeStreamEvent
if err := json.Unmarshal(msg.Data, &response); err != nil {
log.Errorf("ChatCompleteStreamingClaude couldn't parse response: %s, err: %v", string(msg.Data), err)
return
}
log.Debugf("ChatCompleteStreamingClaude got event: %s", string(msg.Data))

if *response.Type == "message_start" && response.Message.Usage != nil {
currentUsage := response.Message.Usage
log.Debugf("ChatCompleteStreamingClaude got message_start, input_tokens: %d, output_tokens: %d", currentUsage.InputTokens, currentUsage.OutputTokens)
usage.Usage.PromptTokens += currentUsage.InputTokens
usage.Usage.CompletionTokens += currentUsage.OutputTokens
log.Debugf("ChatCompleteStreamingClaude usage: %+v", usage.Usage)
}

if *response.Type == "message_delta" && response.Usage != nil {
currentUsage := response.Usage
log.Debugf("ChatCompleteStreamingClaude got message_delta, output_tokens: %d", currentUsage.OutputTokens)
usage.Usage.CompletionTokens += currentUsage.OutputTokens
log.Debugf("ChatCompleteStreamingClaude usage: %+v", usage.Usage)
}

if *response.Type == "content_block_delta" && response.Delta != nil && response.Delta.Text != nil {
messages <- *(*response.Delta).Text
}
})
if err != nil {
log.Errorf("ChatCompleteStreamingClaude couldn't subscribe: %v", err)
}
}()
return messages, nil
}
Loading

0 comments on commit 659ddf3

Please sign in to comment.