You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When using the mistralrs library to process multiple requests in a loop, the blocking_recv call hangs indefinitely after the first iteration. This prevents the code from processing subsequent requests.
Code Sample
use anyhow::Result;
use mistralrs::{
Constraint, DefaultSchedulerMethod, Device, DeviceMapMetadata, GGUFLoaderBuilder,
GGUFSpecificConfig, MistralRs, MistralRsBuilder, ModelDType, NormalRequest, Request,
RequestMessage, ResponseOk, SamplingParams, SchedulerConfig, TokenSource,
};
use std::sync::Arc;
use tokio::sync::mpsc::channel;
fn setup() -> Result<Arc<MistralRs>> {
let loader = GGUFLoaderBuilder::new(
Some("chat_templates_llama2.json".to_string()),
None,
".".to_string(),
vec!["aixsatoshi-Honyaku-13b-Q4_0.gguf".to_string()],
GGUFSpecificConfig {
prompt_batchsize: None,
topology: None,
},
)
.build();
let pipeline = tokio::task::block_in_place(|| {
loader.load_model_from_hf(
None,
TokenSource::None,
&ModelDType::Auto,
&Device::new_metal(0)?,
false,
DeviceMapMetadata::dummy(),
None,
None,
)
})?;
Ok(MistralRsBuilder::new(
pipeline,
SchedulerConfig::DefaultScheduler {
method: DefaultSchedulerMethod::Fixed(5.try_into().unwrap()),
},
)
.build())
}
fn main() -> Result<()> {
let mistralrs = setup()?;
let text = std::env::args()
.nth(1)
.unwrap_or_else(|| "Hello world!".to_string());
let prompt = format!("<english>: {} <NL>\n\n<japanese>: ", text);
for i in 0..10 {
let (tx, mut rx) = channel(10_000);
let request = Request::Normal(NormalRequest {
messages: RequestMessage::Completion {
text: prompt.clone(),
echo_prompt: false,
best_of: 1,
},
sampling_params: SamplingParams::default(),
response: tx,
return_logprobs: false,
is_streaming: false,
id: i,
constraint: Constraint::None,
suffix: None,
adapters: None,
tools: None,
tool_choice: None,
logits_processors: None,
});
mistralrs.get_sender()?.blocking_send(request)?;
let response = rx.blocking_recv().unwrap().as_result()?;
let response_text = match response {
ResponseOk::CompletionDone(c) => c.choices[0].text.clone(),
_ => "Unexpected response".to_string(),
};
println!("Response: {}", response_text);
}
Ok(())
}
Latest commit or version
v0.3.0
The text was updated successfully, but these errors were encountered:
I'm closing this issue as I've opened a new one that provides a more comprehensive reproduction case of both the memory leak and channel closure issues.
Describe the bug
When using the mistralrs library to process multiple requests in a loop, the blocking_recv call hangs indefinitely after the first iteration. This prevents the code from processing subsequent requests.
Code Sample
Latest commit or version
v0.3.0
The text was updated successfully, but these errors were encountered: