Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "add CPU autotp UT (#4263)" #4419

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/cpu-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,4 @@ jobs:
source oneCCL/build/_install/env/setvars.sh
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' -m 'inference_ops' -m 'inference' unit/
4 changes: 0 additions & 4 deletions tests/unit/hybrid_engine/test_he_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
from deepspeed.accelerator import get_accelerator

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
Expand Down
4 changes: 0 additions & 4 deletions tests/unit/hybrid_engine/test_he_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
from deepspeed.accelerator import get_accelerator

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
Expand Down
4 changes: 0 additions & 4 deletions tests/unit/hybrid_engine/test_he_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
from deepspeed.utils import safe_get_full_grad
import numpy.testing as npt
from unit.common import DistributedTest
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)

Expand Down
18 changes: 4 additions & 14 deletions tests/unit/inference/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
pytest.skip("skip inference tests on rocm for now", allow_module_level=True)
Expand Down Expand Up @@ -362,9 +365,6 @@ def test(
if invalid_test_msg:
pytest.skip(invalid_test_msg)

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

model, task = model_w_task
local_rank = int(os.getenv("LOCAL_RANK", "0"))

Expand Down Expand Up @@ -401,9 +401,6 @@ def test(
):
model, task = model_w_task
dtype = torch.float16
if dtype not in get_accelerator().supported_dtypes():
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")

local_rank = int(os.getenv("LOCAL_RANK", "0"))

pipe = pipeline(task, model=model, model_kwargs={"low_cpu_mem_usage": True}, device=local_rank, framework="pt")
Expand Down Expand Up @@ -517,7 +514,7 @@ def test(
[("Helsinki-NLP/opus-mt-en-de", "translation"), ("Salesforce/codegen-350M-mono", "text-generation")],
ids=["marian", "codegen"], #codegen has fusedqkv weight.
)
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
@pytest.mark.parametrize("dtype", [torch.float16], ids=["fp16"])
class TestAutoTensorParallelism(DistributedTest):
world_size = [2]

Expand All @@ -533,13 +530,6 @@ def test(
if invalid_test_msg:
pytest.skip(invalid_test_msg)

if dtype not in get_accelerator().supported_dtypes():
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")

# TODO: enable this test after torch 2.1 stable release
if dtype == torch.bfloat16 and model_w_task[0] == "Salesforce/codegen-350M-mono":
pytest.skip("Codegen model(bf16) need to use torch version > 2.0.")

model, task = model_w_task
local_rank = int(os.getenv("LOCAL_RANK", "0"))
world_size = int(os.getenv("WORLD_SIZE", "2"))
Expand Down