diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index bd38d11872dc4..8f8f5ee3cc70c 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -25,7 +25,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install ruff==0.1.5 + pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1 - name: Analysing the code with ruff run: | ruff vllm tests + - name: Spelling check with codespell + run: | + codespell --toml pyproject.toml \ No newline at end of file diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index ff5609c37febf..7d389a9c7d703 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -375,7 +375,7 @@ def main(args: argparse.Namespace): parser.add_argument( "--disable-tqdm", action="store_true", - help="Specify to disbale tqdm progress bar.", + help="Specify to disable tqdm progress bar.", ) parser.add_argument( "--save-result", diff --git a/format.sh b/format.sh index c78108869659d..eb2c5ab031626 100755 --- a/format.sh +++ b/format.sh @@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1 YAPF_VERSION=$(yapf --version | awk '{print $2}') RUFF_VERSION=$(ruff --version | awk '{print $2}') MYPY_VERSION=$(mypy --version | awk '{print $2}') +CODESPELL_VERSION=$(codespell --version) # # params: tool name, tool version, required version tool_version_check() { @@ -36,6 +37,7 @@ tool_version_check() { tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)" tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)" tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)" +tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)" YAPF_FLAGS=( '--recursive' @@ -93,6 +95,47 @@ echo 'vLLM yapf: Done' # echo 'vLLM mypy:' # mypy +# check spelling of specified files +spell_check() { + codespell "$@" +} + +spell_check_all(){ + codespell --toml pyproject.toml +} + +# Spelling check of files that differ from main branch. +spell_check_changed() { + # The `if` guard ensures that the list of filenames is not empty, which + # could cause ruff to receive 0 positional arguments, making it hang + # waiting for STDIN. + # + # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that + # exist on both branches. + MERGEBASE="$(git merge-base origin/main HEAD)" + + if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then + git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ + codespell + fi +} + +# Run Codespell +## This flag runs spell check of individual files. --files *must* be the first command line +## arg to use this option. +if [[ "$1" == '--files' ]]; then + spell_check "${@:2}" + # If `--all` is passed, then any further arguments are ignored and the + # entire python directory is linted. +elif [[ "$1" == '--all' ]]; then + spell_check_all +else + # Check spelling only of the files that changed in last commit. + spell_check_changed +fi +echo 'vLLM codespell: Done' + + # Lint specified files lint() { ruff "$@" @@ -117,9 +160,9 @@ lint_changed() { } # Run Ruff -echo 'vLLM Ruff:' -## This flag lints individual files. --files *must* be the first command line -## arg to use this option. +echo 'vLLM ruff:' +### This flag lints individual files. --files *must* be the first command line +### arg to use this option. if [[ "$1" == '--files' ]]; then lint "${@:2}" # If `--all` is passed, then any further arguments are ignored and the @@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then exit 1 fi + + diff --git a/mypy.ini b/mypy.ini deleted file mode 100644 index 55c4248ea9d26..0000000000000 --- a/mypy.ini +++ /dev/null @@ -1,8 +0,0 @@ -[mypy] -python_version = 3.8 - -ignore_missing_imports = True - -files = vllm -# TODO(woosuk): Include the code from Megatron and HuggingFace. -exclude = vllm/model_executor/parallel_utils/|vllm/model_executor/models/ diff --git a/pyproject.toml b/pyproject.toml index b197256f6ff55..c5db016cebdb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,4 +31,22 @@ ignore = [ "E731", # line too long, handled by black formatting "E501", + # .strip() with multi-character strings + "B005", + # Loop control variable not used within loop body + "B007", ] + +[tool.mypy] +python_version = "3.8" + +ignore_missing_imports = true + +files = "vllm" +# TODO(woosuk): Include the code from Megatron and HuggingFace. +exclude = "vllm/model_executor/parallel_utils/|vllm/model_executor/models/" + + +[tool.codespell] +ignore-words-list = "dout, te, indicies" +skip = "./tests/prompts" diff --git a/requirements-dev.txt b/requirements-dev.txt index f8126008d0794..b54a2773249cf 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,9 @@ # formatting yapf==0.32.0 toml==0.10.2 +tomli==2.0.1 ruff==0.1.5 +codespell==2.2.6 # type checking mypy==0.991 diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py index f739bbeaab334..18ce300449dbf 100644 --- a/tests/lora/test_layers.py +++ b/tests/lora/test_layers.py @@ -279,7 +279,7 @@ def create_random_embedding_layer(): 256, org_num_embeddings=512) expanded_embedding.weight.data[:512, :] = embedding_data - # We need to deepcopy the embedding as it will be modifed + # We need to deepcopy the embedding as it will be modified # in place lora_embedding = VocabParallelEmbeddingWithLoRA( deepcopy(expanded_embedding)) diff --git a/tests/lora/test_llama.py b/tests/lora/test_llama.py index 06fbf19eea824..dfaf8c700695a 100644 --- a/tests/lora/test_llama.py +++ b/tests/lora/test_llama.py @@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int): "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]", "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]", "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]", - "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]" + "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]" ] sampling_params = vllm.SamplingParams(temperature=0, max_tokens=256, @@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size): "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m", " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ", " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ", - "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE", + "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE", ] expected_lora_output = [ " SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ", diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py index 7f91051f03ac1..3946096d4296a 100644 --- a/vllm/core/block_manager.py +++ b/vllm/core/block_manager.py @@ -178,7 +178,7 @@ def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]: if len(block_table) < len(logical_blocks): if (self.block_sliding_window and len(block_table) >= self.block_sliding_window): - # re-use a block + # reuse a block block_table.append(block_table[len(block_table) % self.block_sliding_window]) else: diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index f4ac2d6dc59fe..5e7cc3091d775 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -158,7 +158,7 @@ def get_num_unfinished_seq_groups(self) -> int: return len(self.waiting) + len(self.running) + len(self.swapped) def _schedule(self) -> SchedulerOutputs: - # Blocks that need to be swaped or copied before model execution. + # Blocks that need to be swapped or copied before model execution. blocks_to_swap_in: Dict[int, int] = {} blocks_to_swap_out: Dict[int, int] = {} blocks_to_copy: Dict[int, List[int]] = {} diff --git a/vllm/lora/punica.py b/vllm/lora/punica.py index 307a33dcf2820..fc74269e55876 100644 --- a/vllm/lora/punica.py +++ b/vllm/lora/punica.py @@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor, r = wb_t_all.size(-1) if buffer is None: # We set the buffer to be float32 by default to avoid - # numerical innacuracies that would otherwise happen + # numerical inaccuracies that would otherwise happen # due to downcasting. buffer = torch.zeros((x.size(0), r), dtype=torch.float32, diff --git a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py index ba40d42307fab..a1a2ab0c4805c 100644 --- a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py +++ b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py @@ -537,7 +537,7 @@ def _fwd_kernel_alibi( alibi_start_q = tl.arange( 0, BLOCK_M) + block_start_loc + cur_batch_ctx_len alibi_start_k = cur_batch_ctx_len - # # init debuger + # # init debugger # offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc # offset_db_k = tl.arange(0, BLOCK_N) # calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL] diff --git a/vllm/model_executor/models/decilm.py b/vllm/model_executor/models/decilm.py index 07aa4b72bf7a0..abf4a462871b0 100644 --- a/vllm/model_executor/models/decilm.py +++ b/vllm/model_executor/models/decilm.py @@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM): Based on the llama executor. The main difference is that DeciLM uses Variable Grouped Query Attention. - The constant number of GQA heads in the decoder is overriden with a value + The constant number of GQA heads in the decoder is overridden with a value per layer. Usually, in the HuggingFace implementation, instead of diff --git a/vllm/model_executor/parallel_utils/custom_all_reduce.py b/vllm/model_executor/parallel_utils/custom_all_reduce.py index ce4c8d02f7694..0c749c0484fc5 100644 --- a/vllm/model_executor/parallel_utils/custom_all_reduce.py +++ b/vllm/model_executor/parallel_utils/custom_all_reduce.py @@ -36,14 +36,14 @@ def init_custom_ar() -> None: if world_size not in _SUPPORTED_WORLD_SIZES: logger.warn( "Custom allreduce is disabled due to an unsupported world size: " - "%d. Supported world sizes: %s. To slience this warning, specify" + "%d. Supported world sizes: %s. To silence this warning, specify" "disable_custom_all_reduce=True explicitly.", world_size, str(_SUPPORTED_WORLD_SIZES)) return if not _can_p2p(rank, world_size): logger.warn( "Custom allreduce is disabled because your platform lacks GPU P2P" - " capability. To slience this warning, specify" + " capability. To silence this warning, specify" "disable_custom_all_reduce=True explicitly.") return _CA_HANDLE = CustomAllreduce(rank, world_size) diff --git a/vllm/model_executor/parallel_utils/parallel_state.py b/vllm/model_executor/parallel_utils/parallel_state.py index aeb07f64c37dc..c821936d06e4e 100644 --- a/vllm/model_executor/parallel_utils/parallel_state.py +++ b/vllm/model_executor/parallel_utils/parallel_state.py @@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank(): def get_pipeline_model_parallel_prev_rank(): - """Return the global rank that preceeds the caller in the pipeline""" + """Return the global rank that precedes the caller in the pipeline""" assert _PIPELINE_GLOBAL_RANKS is not None, ( "Pipeline parallel group is not initialized") rank_in_pipeline = get_pipeline_model_parallel_rank() diff --git a/vllm/utils.py b/vllm/utils.py index d7a3a3a2a9ef9..6206879929061 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2( # NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type, # it may occur Inf or NaN if we directly use torch.randint # to generate random data for fp8 data. - # For example, s.11111.00 in fp8e5m2 format repesents Inf. + # For example, s.11111.00 in fp8e5m2 format represents Inf. # | E4M3 | E5M2 #-----|-------------|------------------- # Inf | N/A | s.11111.00