From 6fd80d41d5c31e256cd760b52cd0257586033eb2 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Tue, 14 Nov 2023 08:09:52 -0500 Subject: [PATCH] Add gpt-4-1106-preview --- tiktoken-rs/src/model.rs | 3 +++ tiktoken-rs/src/tokenizer.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tiktoken-rs/src/model.rs b/tiktoken-rs/src/model.rs index 84a3176..9c51eb1 100644 --- a/tiktoken-rs/src/model.rs +++ b/tiktoken-rs/src/model.rs @@ -32,6 +32,9 @@ macro_rules! starts_with_any { /// /// This function does not panic. It returns a default value of 4096 if the model is not recognized. pub fn get_context_size(model: &str) -> usize { + if starts_with_any!(model, "gpt-4-1106") { + return 128_000; + } if starts_with_any!(model, "gpt-4-32k") { return 32_768; } diff --git a/tiktoken-rs/src/tokenizer.rs b/tiktoken-rs/src/tokenizer.rs index eaf4306..012f1e8 100644 --- a/tiktoken-rs/src/tokenizer.rs +++ b/tiktoken-rs/src/tokenizer.rs @@ -35,6 +35,7 @@ const MODEL_PREFIX_TO_TOKENIZER: &[(&str, Tokenizer)] = &[ const MODEL_TO_TOKENIZER: &[(&str, Tokenizer)] = &[ // chat + ("gpt-4-1106-preview", Tokenizer::Cl100kBase), ("gpt-4-32k", Tokenizer::Cl100kBase), ("gpt-4", Tokenizer::Cl100kBase), ("gpt-3.5-turbo", Tokenizer::Cl100kBase), @@ -130,6 +131,10 @@ mod tests { #[test] fn test_get_tokenizer() { assert_eq!(get_tokenizer("gpt-4-32k-0314"), Some(Tokenizer::Cl100kBase)); + assert_eq!( + get_tokenizer("gpt-4-1106-preview"), + Some(Tokenizer::Cl100kBase) + ); assert_eq!(get_tokenizer("gpt-3.5-turbo"), Some(Tokenizer::Cl100kBase)); assert_eq!( get_tokenizer("gpt-3.5-turbo-0301"),