Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Only import magic_wand if sparsity is enabled (#37)
Browse files Browse the repository at this point in the history
Tested by making sure magic_wand was uninstalled and this code for a
dense model runs fine:
```python
from vllm import LLM, SamplingParams
model = LLM("nm-testing/opt-125m-pruned2.4", enforce_eager=True)
```

Then testing with a sparse model run:
```python
from vllm import LLM, SamplingParams
model = LLM("nm-testing/opt-125m-pruned2.4", sparsity="sparse_w16a16", enforce_eager=True)
```
output:
```
...
  File "/home/michael/code/neuralmagic-vllm/vllm/model_executor/weight_utils.py", line 93, in get_sparse_config
    from vllm.model_executor.layers.sparsity import get_sparsity_config
  File "/home/michael/code/neuralmagic-vllm/vllm/model_executor/layers/sparsity/__init__.py", line 6, in <module>
    raise ValueError(
ValueError: magic_wand is not available and required for sparsity support. Please install it with `pip install magic_wand`
```
  • Loading branch information
mgoin authored Feb 21, 2024
1 parent 64bdde5 commit a3f00c5
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 7 deletions.
14 changes: 13 additions & 1 deletion vllm/model_executor/layers/parameters/lazy_compressed.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import numpy
import torch
from torch.utils._pytree import tree_map
import importlib.util

from typing import Type
from magic_wand import (CompressedStorageFormat, SparseBitmaskStorageFormat)

is_magic_wand_available = importlib.util.find_spec("magic_wand") is not None

# These are types from magic_wand, but we only want to import if required
CompressedStorageFormat = "CompressedStorageFormat"
SparseBitmaskStorageFormat = "SparseBitmaskStorageFormat"


class LazyCompressedParameter(torch.Tensor):
Expand All @@ -14,6 +20,12 @@ def __new__(cls,
storage_format_cls: Type[
CompressedStorageFormat] = SparseBitmaskStorageFormat,
compress_transposed: bool = False):

if not is_magic_wand_available:
raise ValueError(
"magic_wand is not available and required for sparsity "
"support. Please install it with `pip install magic_wand`")

self = torch.Tensor._make_wrapper_subclass(
cls,
size=uncompressed_data.shape,
Expand Down
13 changes: 10 additions & 3 deletions vllm/model_executor/layers/sparsity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from typing import Type
import importlib.util

from vllm.model_executor.layers.sparsity.base_config import SparsityConfig
from vllm.model_executor.layers.sparsity.sparse_w16a16 import SparseW16A16Config
from vllm.model_executor.layers.sparsity.semi_structured_sparse_w16a16 import SemiStructuredSparseW16A16Config
is_magic_wand_available = importlib.util.find_spec("magic_wand") is not None
if not is_magic_wand_available:
raise ValueError(
"magic_wand is not available and required for sparsity "
"support. Please install it with `pip install magic_wand`")

from vllm.model_executor.layers.sparsity.base_config import SparsityConfig # noqa: E402
from vllm.model_executor.layers.sparsity.sparse_w16a16 import SparseW16A16Config # noqa: E402
from vllm.model_executor.layers.sparsity.semi_structured_sparse_w16a16 import SemiStructuredSparseW16A16Config # noqa: E402

_SPARSITY_CONFIG_REGISTRY = {
"sparse_w16a16": SparseW16A16Config,
Expand Down
5 changes: 2 additions & 3 deletions vllm/model_executor/weight_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (get_quantization_config,
QuantizationConfig)
from vllm.model_executor.layers.sparsity import (get_sparsity_config,
SparsityConfig)
from vllm.model_executor.layers.parameters import LazyCompressedParameter

logger = init_logger(__name__)
Expand Down Expand Up @@ -91,7 +89,8 @@ def get_sparse_config(
model_name_or_path: str,
hf_config: PretrainedConfig,
cache_dir: Optional[str] = None,
) -> SparsityConfig:
):
from vllm.model_executor.layers.sparsity import get_sparsity_config
sparsity_cls = get_sparsity_config(sparsity)
hf_sparsity_config = getattr(hf_config, "sparsity_config", None)
if hf_sparsity_config is not None:
Expand Down

0 comments on commit a3f00c5

Please sign in to comment.