Skip to content

Commit

Permalink
Progress bar and logging when loading repeating layers (#479)
Browse files Browse the repository at this point in the history
  • Loading branch information
EricLBuehler authored Jun 25, 2024
1 parent 615a10e commit 7ec3468
Show file tree
Hide file tree
Showing 20 changed files with 93 additions and 48 deletions.
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/gemma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, MatMul, QLinear, ScaledDotProductAttention},
pipeline::{extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

fn default_max_position_embeddings() -> usize {
Expand Down Expand Up @@ -333,7 +334,7 @@ impl Model {
)?;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
cfg.head_dim,
Expand Down
4 changes: 3 additions & 1 deletion mistralrs-core/src/models/llama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, MatMul, RmsNorm, ScaledDotProductAttention},
pipeline::{extract_logits, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

#[derive(Debug, Clone, Deserialize)]
Expand Down Expand Up @@ -316,7 +317,8 @@ impl Llama {
mapper.set_nm_device(vb.pp("model.norm"), false),
)?;
let head_dim = cfg.hidden_size / cfg.num_attention_heads;
let blocks: Vec<_> = (0..cfg.num_hidden_layers)
let blocks: Vec<_> = NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers")
.into_iter()
.map(|i| {
let rotary_emb = Arc::new(
RotaryEmbedding::new(
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/mistral.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, MatMul, RmsNorm, ScaledDotProductAttention},
pipeline::{extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -307,7 +308,7 @@ impl Model {
let head_dim = cfg.hidden_size / cfg.num_attention_heads;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
head_dim,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/mixtral.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, MatMul, RmsNorm, ScaledDotProductAttention},
pipeline::{extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

/// https://github.com/huggingface/transformers/blob/1a585c1222a56bcaecc070966d558d4a9d862e83/src/transformers/models/mixtral/configuration_mixtral.py#L113
Expand Down Expand Up @@ -398,7 +399,7 @@ impl Model {
let head_dim = cfg.hidden_size / cfg.num_attention_heads;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
head_dim,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/phi2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, QLinear, ScaledDotProductAttention},
pipeline::{extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

// https://huggingface.co/microsoft/phi-2/blob/main/configuration_phi.py
Expand Down Expand Up @@ -306,7 +307,7 @@ impl Model {
)?;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_m = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
// Alternative rope scalings are not supported.
let rotary_emb = RotaryEmbedding::new_partial(
cfg.rope_theta,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/phi3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::{
pipeline::{
extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel, Phi3RopeScaling,
},
utils::progress::NiceProgressBar,
};

// https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/config.json
Expand Down Expand Up @@ -306,7 +307,7 @@ impl Model {
)?;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(PhiRotaryEmbedding::new(
vb.dtype(),
cfg.clone(),
Expand Down
5 changes: 3 additions & 2 deletions mistralrs-core/src/models/quantized_llama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::layers::{repeat_kv, CausalMasker, MatMul, QRmsNorm, ScaledDotProductA
use crate::pipeline::{extract_logits, Cache};
use crate::utils::gguf_metadata::ContentMetadata;
use crate::utils::model_config as ModelConfig;
use crate::utils::progress::NiceProgressBar;
use crate::DeviceMapMetadata;

const MAX_SEQ_LEN: u32 = 4096;
Expand Down Expand Up @@ -211,7 +212,7 @@ impl ModelConfig::FromGGML for ModelWeights {
let norm = QRmsNorm::new(ct.remove("norm.weight")?, 1e-5)?;
let output = ct.remove("output.weight")?;
let mut layers = Vec::with_capacity(ct.hparams.n_layer as usize);
for layer_idx in 0..ct.hparams.n_layer {
for layer_idx in NiceProgressBar(0..ct.hparams.n_layer, "Loading repeating layers") {
let prefix = format!("layers.{layer_idx}");
let attention_wq = ct.remove(&format!("{prefix}.attention.wq.weight"))?;
let attention_wk = ct.remove(&format!("{prefix}.attention.wk.weight"))?;
Expand Down Expand Up @@ -348,7 +349,7 @@ impl ModelConfig::FromGGUF for ModelWeights {

let mapper = mapper.into_mapper(block_count, device)?;

for layer_idx in 0..block_count {
for layer_idx in NiceProgressBar(0..block_count, "Loading repeating layers") {
let prefix = format!("blk.{layer_idx}");
let device = mapper.device_for(layer_idx, false).unwrap_or(device);
let rotary = RotaryEmbedding::new_partial(
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/quantized_phi2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::layers::{repeat_kv, CausalMasker, QLinear};
use crate::pipeline::{extract_logits, Cache};
use crate::utils::gguf_metadata::ContentMetadata;
use crate::utils::model_config as ModelConfig;
use crate::utils::progress::NiceProgressBar;
use crate::DeviceMapMetadata;

pub const MAX_SEQ_LEN: usize = 4096;
Expand Down Expand Up @@ -228,7 +229,7 @@ impl ModelConfig::FromGGUF for ModelWeights {

let mapper = mapper.into_mapper(block_count, device)?;

for layer_idx in 0..block_count {
for layer_idx in NiceProgressBar(0..block_count, "Loading repeating layers") {
let prefix = format!("blk.{layer_idx}");
let device = mapper.device_for(layer_idx, false).unwrap_or(device);

Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/quantized_phi3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::layers::{repeat_kv, CausalMasker, MatMul, RmsNorm, ScaledDotProductAt
use crate::pipeline::Cache;
use crate::utils::gguf_metadata::ContentMetadata;
use crate::utils::model_config as ModelConfig;
use crate::utils::progress::NiceProgressBar;
use crate::DeviceMapMetadata;
use candle_core::quantized::gguf_file;
use candle_core::quantized::QMatMul;
Expand Down Expand Up @@ -241,7 +242,7 @@ impl ModelConfig::FromGGUF for ModelWeights {

let mapper = mapper.into_mapper(block_count, device)?;

for layer_idx in 0..block_count {
for layer_idx in NiceProgressBar(0..block_count, "Loading repeating layers") {
let prefix = format!("blk.{layer_idx}");
let device = mapper.device_for(layer_idx, false).unwrap_or(device);
let ffn_up = QMatMul::from_qtensor(ct.tensor(
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/models/qwen2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{
device_map::DeviceMapper,
layers::{repeat_kv, CausalMasker, MatMul, QLinear, RmsNorm, ScaledDotProductAttention},
pipeline::{extract_logits, Cache, IsqModel, NormalLoadingMetadata, NormalModel},
utils::progress::NiceProgressBar,
};

#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
Expand Down Expand Up @@ -284,7 +285,7 @@ impl Model {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let head_dim = cfg.hidden_size / cfg.num_attention_heads;
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
head_dim,
Expand Down
23 changes: 23 additions & 0 deletions mistralrs-core/src/utils/progress.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::thread::JoinHandle;

use either::Either;
use indicatif::{ProgressBar, ProgressBarIter, ProgressIterator, ProgressStyle};
use tqdm::Iter;

// Optionally display a progress bar via the `tqdm` crate:
Expand Down Expand Up @@ -101,3 +102,25 @@ impl Parellelize {
Either::Right(NonThreadingHandle { f })
}
}

/// Nice progress bar with over an iterator and a message.
pub struct NiceProgressBar<T: ExactSizeIterator>(pub T, pub &'static str);

impl<T: ExactSizeIterator> IntoIterator for NiceProgressBar<T> {
type IntoIter = ProgressBarIter<T>;
type Item = T::Item;

fn into_iter(self) -> Self::IntoIter {
let bar = ProgressBar::new(self.0.len() as u64);
bar.set_style(
ProgressStyle::default_bar()
.template(&format!(
"{}: [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{eta}})",
self.1
))
.unwrap()
.progress_chars("#>-"),
);
self.0.progress_with(bar)
}
}
3 changes: 2 additions & 1 deletion mistralrs-core/src/vision_models/phi3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::{
extract_logits, Cache, IsqModel, NormalLoadingMetadata, Phi3RopeScaling, VisionModel,
},
serde_default_fn,
utils::progress::NiceProgressBar,
vision_models::clip::{Activation, ClipConfig, ClipVisionTransformer},
};

Expand Down Expand Up @@ -775,7 +776,7 @@ impl Model {
)?;
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(PhiRotaryEmbedding::new(
vb.dtype(),
cfg.clone(),
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/xlora_models/gemma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{
layers::ScaledDotProductAttention,
lora::{linear_b as linear, LinearLayerLike, LoraConfig, Ordering},
pipeline::{IsqModel, NormalLoadingMetadata},
utils::progress::NiceProgressBar,
};
use candle_core::{quantized::QMatMul, DType, Device, Module, Result, Tensor, D};
use candle_nn::{RotaryEmbedding, VarBuilder};
Expand Down Expand Up @@ -487,7 +488,7 @@ impl XLoraModel {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
let mut count = 0;
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
cfg.head_dim,
Expand Down
61 changes: 32 additions & 29 deletions mistralrs-core/src/xlora_models/llama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
layers::ScaledDotProductAttention,
lora::{linear_no_bias as linear, LinearLayerLike, LoraConfig, Ordering},
pipeline::IsqModel,
utils::progress::NiceProgressBar,
};
use candle_core::{quantized::QMatMul, DType, Device, Result, Tensor};
use candle_nn::{embedding, Embedding, Module, RotaryEmbedding, VarBuilder};
Expand Down Expand Up @@ -577,36 +578,38 @@ impl XLoraLlama {
)?;
let mut count = 0;
let head_dim = cfg.hidden_size / cfg.num_attention_heads;
let mut blocks: Vec<_> = (0..cfg.num_hidden_layers)
.map(|i| {
let rotary_emb = Arc::new(
RotaryEmbedding::new(
cfg.rope_theta,
head_dim,
cfg.max_position_embeddings,
mapper
.device_for(i, false)
.unwrap_or(&normal_loading_metadata.real_device),
is_gptx,
vb.dtype(),
let mut blocks: Vec<_> =
NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers")
.into_iter()
.map(|i| {
let rotary_emb = Arc::new(
RotaryEmbedding::new(
cfg.rope_theta,
head_dim,
cfg.max_position_embeddings,
mapper
.device_for(i, false)
.unwrap_or(&normal_loading_metadata.real_device),
is_gptx,
vb.dtype(),
)
.expect("Failed to create RoPE"),
);
Block::load(
vb.pp(&format!("model.layers.{i}")),
cfg,
lora_config,
&mut count,
&xlora_ordering,
&*mapper,
i,
normal_loading_metadata.loading_isq,
rotary_emb,
preload_adapters,
)
.expect("Failed to create RoPE"),
);
Block::load(
vb.pp(&format!("model.layers.{i}")),
cfg,
lora_config,
&mut count,
&xlora_ordering,
&*mapper,
i,
normal_loading_metadata.loading_isq,
rotary_emb,
preload_adapters,
)
.expect("Failed to load block.")
})
.collect();
.expect("Failed to load block.")
})
.collect();
if xlora_config.is_none() && preload_adapters.is_none() {
// We are now a LoRA model so we must merge the weights
info!("Merging LoRA adapters.");
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/xlora_models/mistral.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
layers::ScaledDotProductAttention,
lora::{linear_no_bias, LinearLayerLike, LoraConfig, Ordering},
pipeline::{IsqModel, NormalLoadingMetadata},
utils::progress::NiceProgressBar,
};
/// Mistral LLM, https://github.com/mistralai/mistral-src
use candle_core::{quantized::QMatMul, DType, Device, Module, Result, Tensor};
Expand Down Expand Up @@ -455,7 +456,7 @@ impl XLoraModel {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
let mut count = 0;
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
head_dim,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/xlora_models/mixtral.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
layers::{MatMul, ScaledDotProductAttention},
lora::{linear_no_bias, LinearLayerLike, LoraConfig, Ordering},
pipeline::{IsqModel, NormalLoadingMetadata},
utils::progress::NiceProgressBar,
};
/// Mixtral Model
/// https://github.com/huggingface/transformers/blob/main/src/transformers/models/mixtral/modeling_mixtral.py
Expand Down Expand Up @@ -593,7 +594,7 @@ impl XLoraModel {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
let mut count = 0;
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(RotaryEmbedding::new(
cfg.rope_theta as f32,
head_dim,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/xlora_models/phi2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{
layers::ScaledDotProductAttention,
lora::{linear, LinearLayerLike, LoraConfig, Ordering},
pipeline::{IsqModel, NormalLoadingMetadata},
utils::progress::NiceProgressBar,
};
/// Phi model.
/// https://huggingface.co/microsoft/phi-2
Expand Down Expand Up @@ -448,7 +449,7 @@ impl Model {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_m = vb_m.pp("layers");
let mut count = 0;
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
// Alternative rope scalings are not supported.
let rotary_emb = RotaryEmbedding::new_partial(
cfg.rope_theta,
Expand Down
3 changes: 2 additions & 1 deletion mistralrs-core/src/xlora_models/phi3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{
layers::ScaledDotProductAttention,
lora::{linear_no_bias, LinearLayerLike, LoraConfig, Ordering},
pipeline::{IsqModel, NormalLoadingMetadata},
utils::progress::NiceProgressBar,
};
use candle_core::{quantized::QMatMul, DType, Device, Module, Result, Tensor, D};
use candle_nn::VarBuilder;
Expand Down Expand Up @@ -403,7 +404,7 @@ impl Model {
let mut layers = Vec::with_capacity(cfg.num_hidden_layers);
let vb_l = vb_m.pp("layers");
let mut count = 0;
for layer_idx in 0..cfg.num_hidden_layers {
for layer_idx in NiceProgressBar(0..cfg.num_hidden_layers, "Loading repeating layers") {
let rotary_emb = Arc::new(PhiRotaryEmbedding::new(
vb.dtype(),
cfg.clone(),
Expand Down
Loading

0 comments on commit 7ec3468

Please sign in to comment.