Skip to content

Commit

Permalink
Merge pull request #112 from mbarretol/add-gpt4o-mini
Browse files Browse the repository at this point in the history
Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large
  • Loading branch information
dqbd authored Aug 15, 2024
2 parents a7cce99 + 831a7e8 commit e77e339
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 4 deletions.
7 changes: 7 additions & 0 deletions .changeset/five-queens-tan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"tiktoken": patch
"js-tiktoken": patch
"@dqbd/tiktoken": patch
---

Add support for gpt-4o, gpt-4o-mini, text-embedding-3-small and text-embedding-3-large
9 changes: 7 additions & 2 deletions js/src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,16 @@ export function getEncodingNameForModel(model: TiktokenModel) {
case "gpt-4-turbo-2024-04-09":
case "gpt-4-turbo-preview":
case "gpt-4-0125-preview":
case "text-embedding-ada-002": {
case "text-embedding-ada-002":
case "text-embedding-3-small":
case "text-embedding-3-large": {
return "cl100k_base";
}
case "gpt-4o":
case "gpt-4o-2024-05-13": {
case "gpt-4o-2024-05-13":
case "gpt-4o-2024-08-06":
case "gpt-4o-mini-2024-07-18":
case "gpt-4o-mini": {
return "o200k_base";
}
default:
Expand Down
7 changes: 6 additions & 1 deletion tiktoken/model_to_encoding.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
"text-davinci-edit-001": "p50k_edit",
"code-davinci-edit-001": "p50k_edit",
"text-embedding-ada-002": "cl100k_base",
"text-embedding-3-small": "cl100k_base",
"text-embedding-3-large": "cl100k_base",
"text-similarity-davinci-001": "r50k_base",
"text-similarity-curie-001": "r50k_base",
"text-similarity-babbage-001": "r50k_base",
Expand Down Expand Up @@ -54,5 +56,8 @@
"gpt-4-0125-preview": "cl100k_base",
"gpt-4-vision-preview": "cl100k_base",
"gpt-4o": "o200k_base",
"gpt-4o-2024-05-13": "o200k_base"
"gpt-4o-2024-05-13": "o200k_base",
"gpt-4o-2024-08-06":"o200k_base",
"gpt-4o-mini-2024-07-18": "o200k_base",
"gpt-4o-mini": "o200k_base"
}
9 changes: 8 additions & 1 deletion wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,9 @@ export type TiktokenModel =
| "gpt-4-vision-preview"
| "gpt-4o"
| "gpt-4o-2024-05-13"
| "gpt-4o-2024-08-06"
| "gpt-4o-mini-2024-07-18"
| "gpt-4o-mini"
/**
* @param {TiktokenModel} encoding
Expand All @@ -436,7 +439,6 @@ pub fn encoding_for_model(
extend_special_tokens: JsValue,
) -> Result<Tiktoken, JsError> {
let encoding = match model {
"davinci" => Ok("p50k_base"),
"text-davinci-003" => Ok("p50k_base"),
"text-davinci-002" => Ok("p50k_base"),
"text-davinci-001" => Ok("r50k_base"),
Expand All @@ -457,6 +459,8 @@ pub fn encoding_for_model(
"text-davinci-edit-001" => Ok("p50k_edit"),
"code-davinci-edit-001" => Ok("p50k_edit"),
"text-embedding-ada-002" => Ok("cl100k_base"),
"text-embedding-3-small" => Ok("cl100k_base"),
"text-embedding-3-large" => Ok("cl100k_base"),
"text-similarity-davinci-001" => Ok("r50k_base"),
"text-similarity-curie-001" => Ok("r50k_base"),
"text-similarity-babbage-001" => Ok("r50k_base"),
Expand Down Expand Up @@ -492,6 +496,9 @@ pub fn encoding_for_model(
"gpt-4-0125-preview" => Ok("cl100k_base"),
"gpt-4o" => Ok("o200k_base"),
"gpt-4o-2024-05-13" => Ok("o200k_base"),
"gpt-4o-2024-08-06" => Ok("o200k_base"),
"gpt-4o-mini-2024-07-18" => Ok("o200k_base"),
"gpt-4o-mini" => Ok("o200k_base"),
model => Err(JsError::new(
format!("Invalid model: {}", model.to_string()).as_str(),
)),
Expand Down

0 comments on commit e77e339

Please sign in to comment.