Skip to content

Commit

Permalink
Minor fix and tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
zxybazh committed Nov 27, 2023
1 parent a509ded commit fd0e02b
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 9 deletions.
2 changes: 0 additions & 2 deletions serve/mlc_serve/api/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ def create_error_response(status_code: HTTPStatus, message: str) -> JSONResponse


router = APIRouter()
import logging
logger = logging.getLogger(__name__)

def _get_sampling_params(request: ChatCompletionRequest) -> SamplingParams:
sampling_params = SamplingParams(
Expand Down
4 changes: 2 additions & 2 deletions serve/mlc_serve/model/paged_cache_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,11 +658,11 @@ def generate(
return [
TextGenerationResult(
sequence_id=sequence_id,
generated_tokens=[next_token],
generated_tokens=[new_token],
error=None,
logprob_info=fetch_logprobs(logprob_info, index, sampling_params[index]),
)
for index, (sequence_id, next_token) in enumerate(zip(sequence_ids, next_tokens))
for index, (sequence_id, new_token) in enumerate(zip(sequence_ids, next_tokens))
]
except RuntimeError:
# Fallback to per-token sampling in case some logits values are corrupted.
Expand Down
10 changes: 5 additions & 5 deletions serve/tests/unittest/test_engine_with_samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,16 @@ def test_stop(
def test_logprobs(
model_artifact_path,
use_staging_engine,
max_num_batched_tokens=2560,
max_input_len=2560,
max_num_sequences=4,
max_input_len=512,
num_requests=5,
logprobs=3,
):
prompt = "hi"
engine = create_engine(
model_artifact_path,
use_staging_engine,
max_num_batched_tokens,
model_artifact_path,
use_staging_engine,
max_num_sequences,
max_input_len,
)
s = 113
Expand Down

0 comments on commit fd0e02b

Please sign in to comment.