Skip to content

Commit

Permalink
Idefics2: sync added image tokens with transformers
Browse files Browse the repository at this point in the history
Before this change, the number of reserved image tokens was not the
same as the number of images. Fixes #2029.
  • Loading branch information
danieldk committed Jun 18, 2024
1 parent 11ea9ce commit 54691e1
Show file tree
Hide file tree
Showing 10 changed files with 1,446 additions and 1,256 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,61 +8,61 @@
"tokens": [
{
"id": 330,
"logprob": -0.13000488,
"logprob": -0.08660889,
"special": false,
"text": " A"
},
{
"id": 13088,
"logprob": -0.6713867,
"logprob": -0.7089844,
"special": false,
"text": " chicken"
},
{
"id": 349,
"logprob": -0.2980957,
"logprob": -0.32885742,
"special": false,
"text": " is"
},
{
"id": 6398,
"logprob": -0.060638428,
"logprob": -0.05126953,
"special": false,
"text": " sitting"
},
{
"id": 356,
"logprob": -0.27319336,
"logprob": -0.35229492,
"special": false,
"text": " on"
},
{
"id": 264,
"logprob": -0.140625,
"logprob": -0.12561035,
"special": false,
"text": " a"
},
{
"id": 17972,
"logprob": -0.040405273,
"logprob": -0.038085938,
"special": false,
"text": " pile"
},
{
"id": 302,
"logprob": -0.0002708435,
"logprob": -0.00018656254,
"special": false,
"text": " of"
},
{
"id": 2445,
"logprob": -0.095336914,
"logprob": -0.07293701,
"special": false,
"text": " money"
},
{
"id": 28723,
"logprob": -0.0068359375,
"logprob": -0.004852295,
"special": false,
"text": "."
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,115 +8,115 @@
"tokens": [
{
"id": 415,
"logprob": -0.04421997,
"logprob": -0.039886475,
"special": false,
"text": " The"
},
{
"id": 12072,
"logprob": -0.13500977,
"logprob": -0.1430664,
"special": false,
"text": " cow"
},
{
"id": 349,
"logprob": -0.06750488,
"logprob": -0.056488037,
"special": false,
"text": " is"
},
{
"id": 6328,
"logprob": -0.6352539,
"logprob": -0.6855469,
"special": false,
"text": " standing"
},
{
"id": 356,
"logprob": -0.16186523,
"logprob": -0.1685791,
"special": false,
"text": " on"
},
{
"id": 272,
"logprob": -0.5078125,
"logprob": -0.50097656,
"special": false,
"text": " the"
},
{
"id": 10305,
"logprob": -0.017913818,
"logprob": -0.017303467,
"special": false,
"text": " beach"
},
{
"id": 304,
"logprob": -1.5205078,
"logprob": -1.3564453,
"special": false,
"text": " and"
},
{
"id": 272,
"logprob": -0.029174805,
"logprob": -0.017868042,
"special": false,
"text": " the"
},
{
"id": 13088,
"logprob": -0.003479004,
"logprob": -0.0027103424,
"special": false,
"text": " chicken"
},
{
"id": 349,
"logprob": -0.0035095215,
"logprob": -0.003156662,
"special": false,
"text": " is"
},
{
"id": 6398,
"logprob": -0.3088379,
"logprob": -0.37304688,
"special": false,
"text": " sitting"
},
{
"id": 356,
"logprob": -0.027755737,
"logprob": -0.034576416,
"special": false,
"text": " on"
},
{
"id": 264,
"logprob": -0.31884766,
"logprob": -0.29418945,
"special": false,
"text": " a"
},
{
"id": 17972,
"logprob": -0.047943115,
"logprob": -0.042877197,
"special": false,
"text": " pile"
},
{
"id": 302,
"logprob": -0.0002925396,
"logprob": -0.00028443336,
"special": false,
"text": " of"
},
{
"id": 2445,
"logprob": -0.02935791,
"logprob": -0.023223877,
"special": false,
"text": " money"
},
{
"id": 28723,
"logprob": -0.031219482,
"logprob": -0.018157959,
"special": false,
"text": "."
},
{
"id": 32002,
"logprob": -0.00034475327,
"logprob": -0.00018393993,
"special": true,
"text": "<end_of_utterance>"
},
Expand Down
2 changes: 1 addition & 1 deletion router/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ pub struct Idefics2 {}

impl Idefics2 {
pub fn get_number_of_features(&self, _height: usize, _width: usize) -> usize {
320
64
}
}

Expand Down
4 changes: 2 additions & 2 deletions router/src/infer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ impl Infer {
validation: Validation,
max_concurrent_requests: usize,
tokenizer_config: HubTokenizerConfig,
processor_config: HubProcessorConfig,
processor_config: Option<HubProcessorConfig>,
) -> Self {
let chat_template = tokenizer_config
.chat_template
.or(processor_config.chat_template)
.or(processor_config.and_then(|p| p.chat_template))
.and_then(|t| match t {
ChatTemplateVersions::Single(template) => Some(template),
ChatTemplateVersions::Multiple(templates) => templates
Expand Down
1 change: 1 addition & 0 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ impl HubTokenizerConfig {
#[derive(Debug, Clone, Deserialize, Default)]
pub struct HubProcessorConfig {
pub chat_template: Option<ChatTemplateVersions>,
pub do_image_splitting: Option<bool>,
pub image_seq_len: usize,
pub processor_class: Option<String>,
}
Expand Down
4 changes: 1 addition & 3 deletions router/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,7 @@ async fn main() -> Result<(), RouterError> {
HubTokenizerConfig::default()
});

let processor_config = processor_config_filename
.and_then(HubProcessorConfig::from_file)
.unwrap_or_default();
let processor_config = processor_config_filename.and_then(HubProcessorConfig::from_file);

tracing::info!("Using config {config:?}");
if tokenizer.is_none() {
Expand Down
3 changes: 2 additions & 1 deletion router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ pub async fn run(
_ngrok_authtoken: Option<String>,
_ngrok_edge: Option<String>,
tokenizer_config: HubTokenizerConfig,
processor_config: HubProcessorConfig,
processor_config: Option<HubProcessorConfig>,
messages_api_enabled: bool,
grammar_support: bool,
max_client_batch_size: usize,
Expand Down Expand Up @@ -1634,6 +1634,7 @@ pub async fn run(
validation_workers,
tokenizer,
config,
processor_config.clone(),
max_best_of,
max_stop_sequences,
max_top_n_tokens,
Expand Down
Loading

0 comments on commit 54691e1

Please sign in to comment.