Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

various tests fixes for flakey tests #2110

Merged
merged 8 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cicd/cicd.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e

pytest -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/
pytest -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/
pytest --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
pytest --durations=10 -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/
pytest --durations=10 -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/
pytest --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ numpy>=1.24.4,<=2.0.1
evaluate==0.4.1
scipy
scikit-learn==1.4.2
pynvml
nvidia-ml-py==12.560.30
art
gradio==3.50.2
tensorboard
Expand Down
23 changes: 18 additions & 5 deletions src/axolotl/utils/bench.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
"""Benchmarking and measurement utilities"""
import functools

import pynvml
import torch
from pynvml.nvml import NVMLError
from transformers.utils.import_utils import is_torch_npu_available

from axolotl.utils.distributed import get_device_type

try:
from pynvml import (
NVMLError,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
nvmlInit,
)
except ImportError:
NVMLError = None
nvmlDeviceGetHandleByIndex = None
nvmlDeviceGetMemoryInfo = None
nvmlInit = None


def check_cuda_device(default_value):
"""
Expand Down Expand Up @@ -68,10 +79,12 @@ def gpu_memory_usage_smi(device=0):
device = device.index
if isinstance(device, str) and device.startswith("cuda:"):
device = int(device[5:])
if not nvmlInit:
return 0.0
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(device)
info = nvmlDeviceGetMemoryInfo(handle)
return info.used / 1024.0**3
except NVMLError:
return 0.0
Expand Down
2 changes: 1 addition & 1 deletion src/axolotl/utils/data/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def load_tokenized_prepared_datasets(
+ "|".join(
sorted(
[
f"{d.path}: {d.type}: {d.shards}: {d.conversation}{d.split}"
f"{d.path}:{d.type}:{d.shards}:{d.conversation}{d.split}"
for d in cfg_datasets
]
)
Expand Down
20 changes: 17 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ def download_smollm2_135m_model():
snapshot_download("HuggingFaceTB/SmolLM2-135M")


@pytest.fixture(scope="session", autouse=True)
def download_llama_68m_random_model():
# download the model
snapshot_download("JackFram/llama-68m")


@pytest.fixture(scope="session", autouse=True)
def download_qwen_2_5_half_billion_model():
# download the model
Expand All @@ -22,18 +28,26 @@ def download_qwen_2_5_half_billion_model():

@pytest.fixture(scope="session", autouse=True)
def download_tatsu_lab_alpaca_dataset():
# download the model
# download the dataset
snapshot_download("tatsu-lab/alpaca", repo_type="dataset")


@pytest.fixture(scope="session", autouse=True)
def download_mhenrichsen_alpaca_2k_dataset():
# download the model
# download the dataset
snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset")


@pytest.fixture(scope="session", autouse=True)
def download_mhenrichsen_alpaca_2k_w_revision_dataset():
# download the dataset
snapshot_download(
"mhenrichsen/alpaca_2k_test", repo_type="dataset", revision="d05c1cb"
)


def download_mlabonne_finetome_100k_dataset():
# download the model
# download the dataset
snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")


Expand Down
7 changes: 4 additions & 3 deletions tests/core/test_trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
def fixture_cfg():
cfg = DictDefault(
{
"base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
"model_type": "AutoModelForCausalLM",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M",
"micro_batch_size": 1,
"gradient_accumulation_steps": 1,
"learning_rate": 0.00005,
Expand All @@ -33,6 +31,9 @@ def fixture_cfg():
"dataloader_num_workers": 1,
"dataloader_pin_memory": True,
"model_config_type": "llama",
"special_tokens": {
"pad_token": "<|endoftext|>",
},
}
)

Expand Down
6 changes: 3 additions & 3 deletions tests/e2e/patched/test_fa_xentropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def test_lora_packing_fa_cross_entropy(self, temp_dir, gradient_accumulation_ste
"flash_attn_cross_entropy": True,
"load_in_8bit": True,
"adapter": "lora",
"lora_r": 32,
"lora_alpha": 64,
"lora_r": 8,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.2,
"val_set_size": 0.05,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
Expand Down
29 changes: 15 additions & 14 deletions tests/e2e/patched/test_resume.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,24 @@ class TestResumeLlama(unittest.TestCase):
"""

@with_temp_dir
def test_resume_qlora_packed(self, temp_dir):
def test_resume_lora_packed(self, temp_dir):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"tokenizer_type": "LlamaTokenizer",
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 1024,
"sample_packing": True,
"flash_attention": True,
"load_in_4bit": True,
"adapter": "qlora",
"lora_r": 32,
"lora_alpha": 64,
"load_in_8bit": True,
"adapter": "lora",
"lora_r": 8,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.1,
"special_tokens": {},
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
"datasets": [
{
"path": "vicgalle/alpaca-gpt4",
Expand All @@ -57,11 +58,11 @@ def test_resume_qlora_packed(self, temp_dir):
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch",
"optimizer": "adamw_8bit",
"lr_scheduler": "cosine",
"save_steps": 10,
"save_steps": 3,
"save_total_limit": 5,
"max_steps": 40,
"max_steps": 15,
"use_tensorboard": True,
}
)
Expand All @@ -77,7 +78,7 @@ def test_resume_qlora_packed(self, temp_dir):

resume_cfg = cfg | DictDefault(
{
"resume_from_checkpoint": f"{temp_dir}/checkpoint-30/",
"resume_from_checkpoint": f"{temp_dir}/checkpoint-9/",
}
)
normalize_config(resume_cfg)
Expand All @@ -93,4 +94,4 @@ def test_resume_qlora_packed(self, temp_dir):
)
pattern = r"first_step\s+(\d+)"
first_steps = int(re.findall(pattern, res.stdout)[0])
assert first_steps == 31
assert first_steps == 10
6 changes: 3 additions & 3 deletions tests/e2e/patched/test_unsloth_qlora.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_unsloth_llama_qlora_fa2(self, temp_dir, sample_packing):
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.2,
"val_set_size": 0.05,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
Expand Down Expand Up @@ -92,7 +92,7 @@ def test_unsloth_llama_qlora_unpacked(self, temp_dir):
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.2,
"val_set_size": 0.05,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_unsloth_llama_qlora_unpacked_no_fa2_fp16(self, temp_dir, sdp_attention)
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.2,
"val_set_size": 0.05,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
Expand Down
6 changes: 4 additions & 2 deletions tests/e2e/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_adopt_adamw(self, temp_dir):
},
],
"num_epochs": 1,
"max_steps": 5,
"micro_batch_size": 8,
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
Expand All @@ -115,7 +116,7 @@ def test_fft_schedule_free_adamw(self, temp_dir):
{
"base_model": "HuggingFaceTB/SmolLM2-135M",
"sequence_len": 1024,
"val_set_size": 0.1,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
Expand All @@ -126,13 +127,14 @@ def test_fft_schedule_free_adamw(self, temp_dir):
},
],
"num_epochs": 1,
"micro_batch_size": 4,
"micro_batch_size": 2,
"gradient_accumulation_steps": 2,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "schedule_free_adamw",
"lr_scheduler": "constant",
"save_safetensors": True,
"max_steps": 10,
}
)
# pylint: disable=duplicate-code
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/test_relora_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def test_relora(self, temp_dir):
],
"warmup_steps": 15,
"num_epochs": 2,
"max_steps": 51, # at least 2x relora_steps
"micro_batch_size": 4,
"gradient_accumulation_steps": 1,
"output_dir": temp_dir,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_load_hub(self):
def test_load_local_hub(self):
"""Niche use case. Verify that a local copy of a hub dataset can be loaded"""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_ds_path = Path("mhenrichsen/alpaca_2k_test")
tmp_ds_path = Path(tmp_dir) / "mhenrichsen/alpaca_2k_test"
tmp_ds_path.mkdir(parents=True, exist_ok=True)
snapshot_download(
repo_id="mhenrichsen/alpaca_2k_test",
Expand All @@ -89,7 +89,7 @@ def test_load_local_hub(self):
"ds_type": "parquet",
"type": "alpaca",
"data_files": [
"mhenrichsen/alpaca_2k_test/alpaca_2000.parquet",
f"{tmp_ds_path}/alpaca_2000.parquet",
],
},
],
Expand Down
10 changes: 6 additions & 4 deletions tests/test_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from axolotl.utils.callbacks.perplexity import Perplexity

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M"


@fixture()
Expand All @@ -22,7 +22,9 @@ def model():

@fixture()
def tokenizer():
return AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer_ = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer_.add_special_tokens({"pad_token": "<|endoftext|>"})
return tokenizer_


def test_perplexity_longer_than_stride(model, metric):
Expand All @@ -33,12 +35,12 @@ def test_perplexity_longer_than_stride(model, metric):
"""
result = metric.compute(model, [sample_text])
ppl = result["score"]
assert round(ppl, 2) == 5.37
assert round(ppl, 2) == 7.41


def test_perplexity_short(model, metric):
# taken from https://huggingface.co/datasets/roneneldan/TinyStories
sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun."
result = metric.compute(model, [sample_text])
ppl = result["score"]
assert round(ppl, 2) == 10.02
assert round(ppl, 2) == 10.33
Loading