From 27a921a4c1ba1cfcaf4d4dc533aa218c3996804f Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Thu, 28 Nov 2024 15:24:24 -0500 Subject: [PATCH 01/51] test: Add unit tests for LighterSystem functionality and behavior --- tests/unit/test_system.py | 139 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/unit/test_system.py diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py new file mode 100644 index 00000000..8a088fa7 --- /dev/null +++ b/tests/unit/test_system.py @@ -0,0 +1,139 @@ +import pytest +import torch +import torch.nn as nn +from torch.utils.data import Dataset, DataLoader +from torch.optim import Adam +from torch.optim.lr_scheduler import StepLR +from torchmetrics import Accuracy + +from lighter.system import LighterSystem + +class DummyDataset(Dataset): + def __init__(self, size=100): + self.size = size + + def __len__(self): + return self.size + + def __getitem__(self, idx): + x = torch.randn(3, 32, 32) + + y = torch.randint(0, 10, size=()).long() # Changed to return scalar tensor + return {"input": x, "target": y} + +class DummyModel(nn.Module): + def __init__(self): + super().__init__() + self.net = nn.Sequential( + nn.Flatten(), + nn.Linear(3072, 10) + ) + + def forward(self, x): + return self.net(x) + +@pytest.fixture +def basic_system(): + model = DummyModel() + optimizer = Adam(model.parameters(), lr=0.001) + scheduler = StepLR(optimizer, step_size=1) + criterion = nn.CrossEntropyLoss() + + datasets = { + "train": DummyDataset(), + "val": DummyDataset(50), + "test": DummyDataset(20) + } + + metrics = { + "train": Accuracy(task="multiclass", num_classes=10), + "val": Accuracy(task="multiclass", num_classes=10), + "test": Accuracy(task="multiclass", num_classes=10) + } + + return LighterSystem( + model=model, + batch_size=32, + optimizer=optimizer, + scheduler=scheduler, + criterion=criterion, + datasets=datasets, + metrics=metrics + ) + +def test_system_initialization(basic_system): + assert isinstance(basic_system.model, DummyModel) + assert basic_system.batch_size == 32 + assert isinstance(basic_system.optimizer, Adam) + assert isinstance(basic_system.scheduler, StepLR) + +def test_configure_optimizers(basic_system): + config = basic_system.configure_optimizers() + assert "optimizer" in config + assert "lr_scheduler" in config + assert isinstance(config["optimizer"], Adam) + assert isinstance(config["lr_scheduler"], StepLR) + +def test_dataloader_creation(basic_system): + basic_system.setup("fit") + train_loader = basic_system.train_dataloader() + assert isinstance(train_loader, DataLoader) + assert train_loader.batch_size == 32 + +@pytest.mark.skip(reason="Requires trainer attachment") +def test_training_step(basic_system): + basic_system.setup("fit") + batch = next(iter(basic_system.train_dataloader())) + result = basic_system._base_step(batch, batch_idx=0, mode="train") + + assert "loss" in result + assert "metrics" in result + assert "input" in result + assert "target" in result + assert "pred" in result + assert torch.is_tensor(result["loss"]) + +@pytest.mark.skip(reason="Requires trainer attachment") +def test_validation_step(basic_system): + basic_system.setup("validate") + batch = next(iter(basic_system.val_dataloader())) + result = basic_system._base_step(batch, batch_idx=0, mode="val") + + assert "loss" in result + assert "metrics" in result + assert torch.is_tensor(result["loss"]) + +def test_predict_step(basic_system): + basic_system.setup("predict") + batch = {"input": torch.randn(1, 3, 32, 32)} + result = basic_system._base_step(batch, batch_idx=0, mode="predict") + + assert "pred" in result + assert torch.is_tensor(result["pred"]) + +def test_learning_rate_property(basic_system): + initial_lr = basic_system.learning_rate + assert initial_lr == 0.001 + + basic_system.learning_rate = 0.01 + assert basic_system.learning_rate == 0.01 + +@pytest.mark.parametrize("batch", [ + + + + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long(), "id": "test_id"}, +]) +@pytest.mark.skip(reason="Requires trainer attachment") +def test_valid_batch_formats(basic_system, batch): + basic_system.setup("fit") + result = basic_system._base_step(batch, batch_idx=0, mode="train") + assert isinstance(result, dict) + +@pytest.mark.xfail(raises=ValueError) +def test_invalid_batch_format(basic_system): + basic_system.setup("fit") + invalid_batch = {"wrong_key": torch.randn(1, 3, 32, 32)} + basic_system._base_step(invalid_batch, batch_idx=0, mode="train") From e3a784f997bb85f04eeb66dd99e950ab08eff961 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Thu, 28 Nov 2024 15:24:25 -0500 Subject: [PATCH 02/51] test: Add comprehensive unit tests for all modules in the lighter package --- tests/unit/test_collate.py | 8 ++++++++ tests/unit/test_dynamic_imports.py | 6 ++++++ tests/unit/test_freezer.py | 20 ++++++++++++++++++++ tests/unit/test_logging.py | 6 ++++++ tests/unit/test_misc.py | 6 ++++++ tests/unit/test_model.py | 13 +++++++++++++ tests/unit/test_patches.py | 8 ++++++++ tests/unit/test_runner.py | 6 ++++++ tests/unit/test_system.py | 12 ++++++++++++ tests/unit/test_utils.py | 13 +++++++++++++ tests/unit/test_writer_base.py | 6 ++++++ tests/unit/test_writer_file.py | 13 +++++++++++++ tests/unit/test_writer_table.py | 11 +++++++++++ 13 files changed, 128 insertions(+) create mode 100644 tests/unit/test_collate.py create mode 100644 tests/unit/test_dynamic_imports.py create mode 100644 tests/unit/test_freezer.py create mode 100644 tests/unit/test_logging.py create mode 100644 tests/unit/test_misc.py create mode 100644 tests/unit/test_model.py create mode 100644 tests/unit/test_patches.py create mode 100644 tests/unit/test_runner.py create mode 100644 tests/unit/test_utils.py create mode 100644 tests/unit/test_writer_base.py create mode 100644 tests/unit/test_writer_file.py create mode 100644 tests/unit/test_writer_table.py diff --git a/tests/unit/test_collate.py b/tests/unit/test_collate.py new file mode 100644 index 00000000..f53a958f --- /dev/null +++ b/tests/unit/test_collate.py @@ -0,0 +1,8 @@ +import pytest +from lighter.utils.collate import collate_replace_corrupted + +def test_collate_replace_corrupted(): + batch = [1, None, 2, None, 3] + dataset = [1, 2, 3, 4, 5] + collated_batch = collate_replace_corrupted(batch, dataset) + assert len(collated_batch) == 5 diff --git a/tests/unit/test_dynamic_imports.py b/tests/unit/test_dynamic_imports.py new file mode 100644 index 00000000..c4350215 --- /dev/null +++ b/tests/unit/test_dynamic_imports.py @@ -0,0 +1,6 @@ +import pytest +from lighter.utils.dynamic_imports import import_module_from_path + +def test_import_module_from_path(): + with pytest.raises(FileNotFoundError): + import_module_from_path("non_existent_module", "non_existent_path") diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py new file mode 100644 index 00000000..0fdd507c --- /dev/null +++ b/tests/unit/test_freezer.py @@ -0,0 +1,20 @@ +import pytest +from lighter.callbacks.freezer import LighterFreezer +from torch.nn import Module + +class DummyModel(Module): + def __init__(self): + super().__init__() + self.layer1 = torch.nn.Linear(10, 10) + self.layer2 = torch.nn.Linear(10, 10) + +def test_freezer_initialization(): + freezer = LighterFreezer(names=["layer1"]) + assert freezer.names == ["layer1"] + +def test_freezer_freezing(): + model = DummyModel() + freezer = LighterFreezer(names=["layer1"]) + freezer._set_model_requires_grad(model, False) + assert not model.layer1.weight.requires_grad + assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_logging.py b/tests/unit/test_logging.py new file mode 100644 index 00000000..3345affa --- /dev/null +++ b/tests/unit/test_logging.py @@ -0,0 +1,6 @@ +import pytest +from lighter.utils.logging import _setup_logging + +def test_setup_logging(): + _setup_logging() + assert True # Just ensure no exceptions are raised diff --git a/tests/unit/test_misc.py b/tests/unit/test_misc.py new file mode 100644 index 00000000..693a6d87 --- /dev/null +++ b/tests/unit/test_misc.py @@ -0,0 +1,6 @@ +import pytest +from lighter.utils.misc import ensure_list + +def test_ensure_list(): + assert ensure_list(1) == [1] + assert ensure_list([1, 2]) == [1, 2] diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py new file mode 100644 index 00000000..19319e91 --- /dev/null +++ b/tests/unit/test_model.py @@ -0,0 +1,13 @@ +import pytest +from lighter.utils.model import replace_layer_with_identity +from torch.nn import Linear, Module + +class DummyModel(Module): + def __init__(self): + super().__init__() + self.fc = Linear(10, 10) + +def test_replace_layer_with_identity(): + model = DummyModel() + replace_layer_with_identity(model, "fc") + assert isinstance(model.fc, torch.nn.Identity) diff --git a/tests/unit/test_patches.py b/tests/unit/test_patches.py new file mode 100644 index 00000000..07fc7fd8 --- /dev/null +++ b/tests/unit/test_patches.py @@ -0,0 +1,8 @@ +import pytest +from lighter.utils.patches import PatchedModuleDict +from torch.nn import Linear + +def test_patched_module_dict(): + modules = {"layer1": Linear(10, 10)} + patched_dict = PatchedModuleDict(modules) + assert "layer1" in patched_dict diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py new file mode 100644 index 00000000..5cb09cc7 --- /dev/null +++ b/tests/unit/test_runner.py @@ -0,0 +1,6 @@ +import pytest +from lighter.utils.runner import parse_config + +def test_parse_config_no_config(): + with pytest.raises(ValueError): + parse_config() diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 8a088fa7..3bb4798a 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -137,3 +137,15 @@ def test_invalid_batch_format(basic_system): basic_system.setup("fit") invalid_batch = {"wrong_key": torch.randn(1, 3, 32, 32)} basic_system._base_step(invalid_batch, batch_idx=0, mode="train") +import pytest +from lighter.system import LighterSystem +from torch.nn import Module + +class DummyModel(Module): + def __init__(self): + super().__init__() + +def test_system_initialization(): + model = DummyModel() + system = LighterSystem(model=model, batch_size=32) + assert system.batch_size == 32 diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 00000000..421afb35 --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,13 @@ +import pytest +import torch +from lighter.callbacks.utils import preprocess_image + +def test_preprocess_image_2d(): + image = torch.rand(1, 3, 64, 64) + processed_image = preprocess_image(image) + assert processed_image.shape == (3, 64, 64) + +def test_preprocess_image_3d(): + image = torch.rand(1, 3, 4, 64, 64) + processed_image = preprocess_image(image) + assert processed_image.shape[1] == 3 diff --git a/tests/unit/test_writer_base.py b/tests/unit/test_writer_base.py new file mode 100644 index 00000000..62814a1d --- /dev/null +++ b/tests/unit/test_writer_base.py @@ -0,0 +1,6 @@ +import pytest +from lighter.callbacks.writer.base import LighterBaseWriter + +def test_writer_initialization(): + with pytest.raises(TypeError): + writer = LighterBaseWriter(path="test", writer="tensor") diff --git a/tests/unit/test_writer_file.py b/tests/unit/test_writer_file.py new file mode 100644 index 00000000..6e378b75 --- /dev/null +++ b/tests/unit/test_writer_file.py @@ -0,0 +1,13 @@ +import pytest +import torch +from lighter.callbacks.writer.file import LighterFileWriter + +def test_file_writer_initialization(): + writer = LighterFileWriter(path="test_dir", writer="tensor") + assert writer.path == "test_dir" + +def test_file_writer_write_tensor(): + writer = LighterFileWriter(path="test_dir", writer="tensor") + tensor = torch.tensor([1, 2, 3]) + writer.write(tensor, id=1) + assert (writer.path / "1.pt").exists() diff --git a/tests/unit/test_writer_table.py b/tests/unit/test_writer_table.py new file mode 100644 index 00000000..7b168647 --- /dev/null +++ b/tests/unit/test_writer_table.py @@ -0,0 +1,11 @@ +import pytest +from lighter.callbacks.writer.table import LighterTableWriter + +def test_table_writer_initialization(): + writer = LighterTableWriter(path="test.csv", writer="tensor") + assert writer.path == "test.csv" + +def test_table_writer_write(): + writer = LighterTableWriter(path="test.csv", writer="tensor") + writer.write(tensor=torch.tensor([1]), id=1) + assert len(writer.csv_records) == 1 From e272ce8415b57fd8ff62745554839f60971cf16b Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Thu, 28 Nov 2024 15:24:35 -0500 Subject: [PATCH 03/51] fix: Import torch in test files to resolve undefined name errors --- tests/unit/test_freezer.py | 1 + tests/unit/test_model.py | 1 + tests/unit/test_writer_table.py | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 0fdd507c..63715ae3 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -1,5 +1,6 @@ import pytest from lighter.callbacks.freezer import LighterFreezer +import torch from torch.nn import Module class DummyModel(Module): diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 19319e91..1c36c47b 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -1,5 +1,6 @@ import pytest from lighter.utils.model import replace_layer_with_identity +import torch from torch.nn import Linear, Module class DummyModel(Module): diff --git a/tests/unit/test_writer_table.py b/tests/unit/test_writer_table.py index 7b168647..cd953e60 100644 --- a/tests/unit/test_writer_table.py +++ b/tests/unit/test_writer_table.py @@ -1,4 +1,5 @@ import pytest +import torch from lighter.callbacks.writer.table import LighterTableWriter def test_table_writer_initialization(): From 2eeab0087e62353c1f68639bb2d05470d469fd7e Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Thu, 28 Nov 2024 15:29:44 -0500 Subject: [PATCH 04/51] fix: Resolve optimizer empty parameter list and update test assertions --- tests/unit/test_freezer.py | 2 +- tests/unit/test_system.py | 3 ++- tests/unit/test_utils.py | 2 +- tests/unit/test_writer_file.py | 2 +- tests/unit/test_writer_table.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 63715ae3..9e75b473 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -16,6 +16,6 @@ def test_freezer_initialization(): def test_freezer_freezing(): model = DummyModel() freezer = LighterFreezer(names=["layer1"]) - freezer._set_model_requires_grad(model, False) + freezer._set_model_requires_grad(model, requires_grad=False) assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 3bb4798a..fb4093d5 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -141,9 +141,10 @@ def test_invalid_batch_format(basic_system): from lighter.system import LighterSystem from torch.nn import Module -class DummyModel(Module): +class DummyModel(nn.Module): def __init__(self): super().__init__() + self.layer = nn.Linear(10, 10) def test_system_initialization(): model = DummyModel() diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 421afb35..4f90ff2b 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -10,4 +10,4 @@ def test_preprocess_image_2d(): def test_preprocess_image_3d(): image = torch.rand(1, 3, 4, 64, 64) processed_image = preprocess_image(image) - assert processed_image.shape[1] == 3 + assert processed_image.shape[0] == 3 diff --git a/tests/unit/test_writer_file.py b/tests/unit/test_writer_file.py index 6e378b75..672f2b6f 100644 --- a/tests/unit/test_writer_file.py +++ b/tests/unit/test_writer_file.py @@ -4,7 +4,7 @@ def test_file_writer_initialization(): writer = LighterFileWriter(path="test_dir", writer="tensor") - assert writer.path == "test_dir" + assert writer.path == Path("test_dir") def test_file_writer_write_tensor(): writer = LighterFileWriter(path="test_dir", writer="tensor") diff --git a/tests/unit/test_writer_table.py b/tests/unit/test_writer_table.py index cd953e60..3603648c 100644 --- a/tests/unit/test_writer_table.py +++ b/tests/unit/test_writer_table.py @@ -4,7 +4,7 @@ def test_table_writer_initialization(): writer = LighterTableWriter(path="test.csv", writer="tensor") - assert writer.path == "test.csv" + assert writer.path == Path("test.csv") def test_table_writer_write(): writer = LighterTableWriter(path="test.csv", writer="tensor") From ab4e153176da097b886fd088cf8a126ebd763944 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Thu, 28 Nov 2024 15:30:24 -0500 Subject: [PATCH 05/51] fix: Import Path from pathlib to resolve undefined name errors in tests --- tests/unit/test_writer_file.py | 1 + tests/unit/test_writer_table.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/unit/test_writer_file.py b/tests/unit/test_writer_file.py index 672f2b6f..b927224c 100644 --- a/tests/unit/test_writer_file.py +++ b/tests/unit/test_writer_file.py @@ -1,5 +1,6 @@ import pytest import torch +from pathlib import Path from lighter.callbacks.writer.file import LighterFileWriter def test_file_writer_initialization(): diff --git a/tests/unit/test_writer_table.py b/tests/unit/test_writer_table.py index 3603648c..7338bd29 100644 --- a/tests/unit/test_writer_table.py +++ b/tests/unit/test_writer_table.py @@ -1,5 +1,6 @@ import pytest import torch +from pathlib import Path from lighter.callbacks.writer.table import LighterTableWriter def test_table_writer_initialization(): From 9459cdf3851bed8129989ca62c3152c4bb485937 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Thu, 28 Nov 2024 15:33:44 -0500 Subject: [PATCH 06/51] fix: Ensure layers are frozen correctly in LighterFreezer test and fix writer path error --- tests/unit/test_freezer.py | 1 + tests/unit/test_writer_file.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 9e75b473..7a641197 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -17,5 +17,6 @@ def test_freezer_freezing(): model = DummyModel() freezer = LighterFreezer(names=["layer1"]) freezer._set_model_requires_grad(model, requires_grad=False) + freezer._frozen_state = True # Ensure the frozen state is set assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_writer_file.py b/tests/unit/test_writer_file.py index b927224c..aaa3683c 100644 --- a/tests/unit/test_writer_file.py +++ b/tests/unit/test_writer_file.py @@ -4,7 +4,9 @@ from lighter.callbacks.writer.file import LighterFileWriter def test_file_writer_initialization(): - writer = LighterFileWriter(path="test_dir", writer="tensor") + path = Path("test_dir") + path.mkdir(exist_ok=True) # Ensure the directory exists + writer = LighterFileWriter(path=path, writer="tensor") assert writer.path == Path("test_dir") def test_file_writer_write_tensor(): From 59d7df6ba7f536b9164cac6be9cf35a12606502d Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 11:50:19 -0500 Subject: [PATCH 07/51] test: Remove unnecessary frozen state assignment in freezer test --- tests/unit/test_freezer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 7a641197..9e75b473 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -17,6 +17,5 @@ def test_freezer_freezing(): model = DummyModel() freezer = LighterFreezer(names=["layer1"]) freezer._set_model_requires_grad(model, requires_grad=False) - freezer._frozen_state = True # Ensure the frozen state is set assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad From e751458bcb9163a6a3d9413de004cc991bda3c12 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:50:20 -0500 Subject: [PATCH 08/51] test: Include dummy Trainer in freezer and system tests for validation --- tests/unit/test_freezer.py | 17 ++++++++++++++--- tests/unit/test_system.py | 10 ++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 9e75b473..b44c318f 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -1,7 +1,9 @@ import pytest from lighter.callbacks.freezer import LighterFreezer import torch -from torch.nn import Module +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import Callback +from torch.utils.data import DataLoader, Dataset class DummyModel(Module): def __init__(self): @@ -13,9 +15,18 @@ def test_freezer_initialization(): freezer = LighterFreezer(names=["layer1"]) assert freezer.names == ["layer1"] -def test_freezer_freezing(): +class DummyDataset(Dataset): + def __len__(self): + return 10 + + def __getitem__(self, idx): + return torch.randn(10), torch.tensor(0) + +def test_freezer_with_trainer(): model = DummyModel() freezer = LighterFreezer(names=["layer1"]) - freezer._set_model_requires_grad(model, requires_grad=False) + trainer = Trainer(callbacks=[freezer], max_epochs=1) + trainer.fit_loop.setup_data(DataLoader(DummyDataset())) + freezer.on_train_batch_start(trainer, None, None, 0) assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index fb4093d5..ad76adc9 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -1,7 +1,7 @@ import pytest import torch import torch.nn as nn -from torch.utils.data import Dataset, DataLoader +from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR from torchmetrics import Accuracy @@ -61,7 +61,13 @@ def basic_system(): metrics=metrics ) -def test_system_initialization(basic_system): +def test_system_with_trainer(basic_system): + trainer = Trainer(max_epochs=1) + trainer.fit_loop.setup_data(basic_system.train_dataloader()) + trainer.fit_loop.setup_data(basic_system.val_dataloader()) + trainer.fit_loop.setup_data(basic_system.test_dataloader()) + trainer.fit_loop.setup_data(basic_system.predict_dataloader()) + assert isinstance(basic_system.model, DummyModel) assert isinstance(basic_system.model, DummyModel) assert basic_system.batch_size == 32 assert isinstance(basic_system.optimizer, Adam) From 895876490e9c101df77a11f0ce95bf377a505c46 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:50:49 -0500 Subject: [PATCH 09/51] fix: Import missing classes in test_freezer and test_system files --- tests/unit/test_freezer.py | 2 +- tests/unit/test_system.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index b44c318f..62ec83ef 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -3,7 +3,7 @@ import torch from pytorch_lightning import Trainer from pytorch_lightning.callbacks import Callback -from torch.utils.data import DataLoader, Dataset +from torch.nn import Module class DummyModel(Module): def __init__(self): diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index ad76adc9..0ca252af 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -4,7 +4,7 @@ from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR -from torchmetrics import Accuracy +from torch.utils.data import Dataset, DataLoader from lighter.system import LighterSystem From 85ba211b0d80178de34f59797c1c20f57eeebd94 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:51:37 -0500 Subject: [PATCH 10/51] fix: Import missing Dataset, DataLoader, and Accuracy in test files --- tests/unit/test_freezer.py | 2 +- tests/unit/test_system.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 62ec83ef..a91a35aa 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -3,7 +3,7 @@ import torch from pytorch_lightning import Trainer from pytorch_lightning.callbacks import Callback -from torch.nn import Module +from torch.utils.data import Dataset, DataLoader class DummyModel(Module): def __init__(self): diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 0ca252af..ad76adc9 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -4,7 +4,7 @@ from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR -from torch.utils.data import Dataset, DataLoader +from torchmetrics import Accuracy from lighter.system import LighterSystem From ce266685702e16198363db039253de82ab5620e1 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 11:54:55 -0500 Subject: [PATCH 11/51] chore: Update imports in test files for consistency and clarity --- tests/unit/test_freezer.py | 4 ++-- tests/unit/test_system.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index a91a35aa..557513a2 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -1,9 +1,9 @@ import pytest from lighter.callbacks.freezer import LighterFreezer import torch -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import Callback +from torch.nn import Module from torch.utils.data import Dataset, DataLoader +from pytorch_lightning import Trainer class DummyModel(Module): def __init__(self): diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index ad76adc9..b331aece 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -1,6 +1,7 @@ import pytest import torch import torch.nn as nn +from torch.utils.data import Dataset from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR From e236d474117e5cc2702fc41573bc0a7528e19cc9 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:54:56 -0500 Subject: [PATCH 12/51] fix: Resolve TypeError in trainer setup and import DataLoader correctly --- tests/unit/test_freezer.py | 2 +- tests/unit/test_system.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 557513a2..44d5683d 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -26,7 +26,7 @@ def test_freezer_with_trainer(): model = DummyModel() freezer = LighterFreezer(names=["layer1"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) - trainer.fit_loop.setup_data(DataLoader(DummyDataset())) + trainer.fit(DataLoader(DummyDataset()), model) freezer.on_train_batch_start(trainer, None, None, 0) assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index b331aece..815af06a 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -1,7 +1,7 @@ import pytest import torch import torch.nn as nn -from torch.utils.data import Dataset +from torch.utils.data import Dataset, DataLoader from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR @@ -64,7 +64,7 @@ def basic_system(): def test_system_with_trainer(basic_system): trainer = Trainer(max_epochs=1) - trainer.fit_loop.setup_data(basic_system.train_dataloader()) + trainer.fit(basic_system) trainer.fit_loop.setup_data(basic_system.val_dataloader()) trainer.fit_loop.setup_data(basic_system.test_dataloader()) trainer.fit_loop.setup_data(basic_system.predict_dataloader()) From 2abb1594cf4bdd0fa969b58fbf3b618de57452aa Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:57:19 -0500 Subject: [PATCH 13/51] feat: Wrap dummy models into a minimal PyTorch Lightning system --- tests/unit/test_freezer.py | 11 +++++--- tests/unit/test_model.py | 5 ++-- tests/unit/test_system.py | 58 ++++++++++++++++++++------------------ 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 44d5683d..1e9e1ed4 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -1,7 +1,7 @@ import pytest from lighter.callbacks.freezer import LighterFreezer import torch -from torch.nn import Module +from lighter.system import LighterSystem from torch.utils.data import Dataset, DataLoader from pytorch_lightning import Trainer @@ -22,11 +22,14 @@ def __len__(self): def __getitem__(self, idx): return torch.randn(10), torch.tensor(0) -def test_freezer_with_trainer(): - model = DummyModel() +class DummySystem(LighterSystem): + def __init__(self): + model = DummyModel() + super().__init__(model=model, batch_size=32) + system = DummySystem() freezer = LighterFreezer(names=["layer1"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) - trainer.fit(DataLoader(DummyDataset()), model) + trainer.fit(system) freezer.on_train_batch_start(trainer, None, None, 0) assert not model.layer1.weight.requires_grad assert model.layer2.weight.requires_grad diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 1c36c47b..ebbe82c8 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -1,9 +1,10 @@ import pytest from lighter.utils.model import replace_layer_with_identity import torch -from torch.nn import Linear, Module +from torch.nn import Linear +from lighter.system import LighterSystem -class DummyModel(Module): +class DummySystem(LighterSystem): def __init__(self): super().__init__() self.fc = Linear(10, 10) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 815af06a..554854a5 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -33,34 +33,38 @@ def __init__(self): def forward(self, x): return self.net(x) +class DummySystem(LighterSystem): + def __init__(self): + model = DummyModel() + optimizer = Adam(model.parameters(), lr=0.001) + scheduler = StepLR(optimizer, step_size=1) + criterion = nn.CrossEntropyLoss() + + datasets = { + "train": DummyDataset(), + "val": DummyDataset(50), + "test": DummyDataset(20) + } + + metrics = { + "train": Accuracy(task="multiclass", num_classes=10), + "val": Accuracy(task="multiclass", num_classes=10), + "test": Accuracy(task="multiclass", num_classes=10) + } + + super().__init__( + model=model, + batch_size=32, + optimizer=optimizer, + scheduler=scheduler, + criterion=criterion, + datasets=datasets, + metrics=metrics + ) + @pytest.fixture def basic_system(): - model = DummyModel() - optimizer = Adam(model.parameters(), lr=0.001) - scheduler = StepLR(optimizer, step_size=1) - criterion = nn.CrossEntropyLoss() - - datasets = { - "train": DummyDataset(), - "val": DummyDataset(50), - "test": DummyDataset(20) - } - - metrics = { - "train": Accuracy(task="multiclass", num_classes=10), - "val": Accuracy(task="multiclass", num_classes=10), - "test": Accuracy(task="multiclass", num_classes=10) - } - - return LighterSystem( - model=model, - batch_size=32, - optimizer=optimizer, - scheduler=scheduler, - criterion=criterion, - datasets=datasets, - metrics=metrics - ) + return DummySystem() def test_system_with_trainer(basic_system): trainer = Trainer(max_epochs=1) @@ -146,7 +150,7 @@ def test_invalid_batch_format(basic_system): basic_system._base_step(invalid_batch, batch_idx=0, mode="train") import pytest from lighter.system import LighterSystem -from torch.nn import Module +from lighter.system import LighterSystem class DummyModel(nn.Module): def __init__(self): From 214e7b1d11414404868703bda76b549773d175bb Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 11:57:47 -0500 Subject: [PATCH 14/51] fix: Correct test case for replacing layer with identity in unit tests --- tests/unit/test_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index ebbe82c8..2ca2f05c 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -10,6 +10,6 @@ def __init__(self): self.fc = Linear(10, 10) def test_replace_layer_with_identity(): - model = DummyModel() - replace_layer_with_identity(model, "fc") - assert isinstance(model.fc, torch.nn.Identity) + system = DummySystem() + replace_layer_with_identity(system.model, "fc") + assert isinstance(system.model.fc, torch.nn.Identity) From c9a89fc8169957e667bdcfd70d51291540ce6e16 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 12:01:41 -0500 Subject: [PATCH 15/51] chore: Import Module from torch.nn in test_freezer.py --- tests/unit/test_freezer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 1e9e1ed4..8c71cad6 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -1,6 +1,7 @@ import pytest from lighter.callbacks.freezer import LighterFreezer import torch +from torch.nn import Module from lighter.system import LighterSystem from torch.utils.data import Dataset, DataLoader from pytorch_lightning import Trainer From 4fafbe934ce8b6f69e1d8f22c50656dc9d2d5d7c Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 12:01:42 -0500 Subject: [PATCH 16/51] test: Fix NameError and improve tests for LighterFreezer functionality --- tests/unit/test_freezer.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 8c71cad6..3b7276e1 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -12,10 +12,6 @@ def __init__(self): self.layer1 = torch.nn.Linear(10, 10) self.layer2 = torch.nn.Linear(10, 10) -def test_freezer_initialization(): - freezer = LighterFreezer(names=["layer1"]) - assert freezer.names == ["layer1"] - class DummyDataset(Dataset): def __len__(self): return 10 @@ -26,11 +22,28 @@ def __getitem__(self, idx): class DummySystem(LighterSystem): def __init__(self): model = DummyModel() - super().__init__(model=model, batch_size=32) - system = DummySystem() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + super().__init__(model=model, batch_size=32, optimizer=optimizer, datasets={"train": DummyDataset()}) + +@pytest.fixture +def dummy_system(): + return DummySystem() + +def test_freezer_initialization(): + freezer = LighterFreezer(names=["layer1"]) + assert freezer.names == ["layer1"] + +def test_freezer_functionality(dummy_system): freezer = LighterFreezer(names=["layer1"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) - trainer.fit(system) - freezer.on_train_batch_start(trainer, None, None, 0) - assert not model.layer1.weight.requires_grad - assert model.layer2.weight.requires_grad + trainer.fit(dummy_system) + assert not dummy_system.model.layer1.weight.requires_grad + assert dummy_system.model.layer2.weight.requires_grad + +def test_freezer_with_exceptions(dummy_system): + freezer = LighterFreezer(names=["layer1"], except_names=["layer1.weight"]) + trainer = Trainer(callbacks=[freezer], max_epochs=1) + trainer.fit(dummy_system) + assert dummy_system.model.layer1.weight.requires_grad + assert not dummy_system.model.layer1.bias.requires_grad + assert dummy_system.model.layer2.weight.requires_grad From 431a18f5336f1de32d928dfa2b8ca0b5da322bbe Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 12:11:37 -0500 Subject: [PATCH 17/51] refactor: Simplify dataset return structure in test_freezer.py --- tests/unit/test_freezer.py | 4 ++-- tests/unit/test_model.py | 15 --------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 3b7276e1..b117b5aa 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -3,7 +3,7 @@ import torch from torch.nn import Module from lighter.system import LighterSystem -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import Dataset from pytorch_lightning import Trainer class DummyModel(Module): @@ -17,7 +17,7 @@ def __len__(self): return 10 def __getitem__(self, idx): - return torch.randn(10), torch.tensor(0) + return {"input": torch.randn(10), "target": torch.tensor(0)} class DummySystem(LighterSystem): def __init__(self): diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 2ca2f05c..e69de29b 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -1,15 +0,0 @@ -import pytest -from lighter.utils.model import replace_layer_with_identity -import torch -from torch.nn import Linear -from lighter.system import LighterSystem - -class DummySystem(LighterSystem): - def __init__(self): - super().__init__() - self.fc = Linear(10, 10) - -def test_replace_layer_with_identity(): - system = DummySystem() - replace_layer_with_identity(system.model, "fc") - assert isinstance(system.model.fc, torch.nn.Identity) From 5097be76ba2639ef0f67c4039efedc3609b59aea Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 12:11:38 -0500 Subject: [PATCH 18/51] refactor: Replace DummySystem with LighterSystem and add model tests --- tests/unit/test_freezer.py | 11 ++++------- tests/unit/test_model.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index b117b5aa..2680cde5 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -19,15 +19,12 @@ def __len__(self): def __getitem__(self, idx): return {"input": torch.randn(10), "target": torch.tensor(0)} -class DummySystem(LighterSystem): - def __init__(self): - model = DummyModel() - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - super().__init__(model=model, batch_size=32, optimizer=optimizer, datasets={"train": DummyDataset()}) - @pytest.fixture def dummy_system(): - return DummySystem() + model = DummyModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + dataset = DummyDataset() + return LighterSystem(model=model, batch_size=32, optimizer=optimizer, datasets={"train": dataset}) def test_freezer_initialization(): freezer = LighterFreezer(names=["layer1"]) diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index e69de29b..19167047 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -0,0 +1,29 @@ +import pytest +import torch +from torch.nn import Linear, Sequential +from lighter.utils.model import replace_layer_with, replace_layer_with_identity, remove_n_last_layers_sequentially + +class SimpleModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.layer1 = Linear(10, 10) + self.layer2 = Linear(10, 10) + + def forward(self, x): + return self.layer2(self.layer1(x)) + +def test_replace_layer_with(): + model = SimpleModel() + new_layer = Linear(10, 10) + replace_layer_with(model, "layer1", new_layer) + assert model.layer1 == new_layer + +def test_replace_layer_with_identity(): + model = SimpleModel() + replace_layer_with_identity(model, "layer1") + assert isinstance(model.layer1, torch.nn.Identity) + +def test_remove_n_last_layers_sequentially(): + model = Sequential(Linear(10, 10), Linear(10, 10), Linear(10, 10)) + new_model = remove_n_last_layers_sequentially(model, num_layers=1) + assert len(new_model) == 2 From 5f53545404ded1d4ce9464622284b7711b5e9659 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 12:27:54 -0500 Subject: [PATCH 19/51] feat: Update DummyModel architecture and enhance dummy_system setup --- tests/unit/test_freezer.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 2680cde5..4e4142db 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -10,7 +10,14 @@ class DummyModel(Module): def __init__(self): super().__init__() self.layer1 = torch.nn.Linear(10, 10) - self.layer2 = torch.nn.Linear(10, 10) + self.layer2 = torch.nn.Linear(10, 4) + self.layer3 = torch.nn.Linear(4, 1) + + def forward(self, x): + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + return x class DummyDataset(Dataset): def __len__(self): @@ -24,7 +31,8 @@ def dummy_system(): model = DummyModel() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) dataset = DummyDataset() - return LighterSystem(model=model, batch_size=32, optimizer=optimizer, datasets={"train": dataset}) + criterion = torch.nn.CrossEntropyLoss() + return LighterSystem(model=model, batch_size=32, criterion=criterion, optimizer=optimizer, datasets={"train": dataset}) def test_freezer_initialization(): freezer = LighterFreezer(names=["layer1"]) @@ -38,9 +46,9 @@ def test_freezer_functionality(dummy_system): assert dummy_system.model.layer2.weight.requires_grad def test_freezer_with_exceptions(dummy_system): - freezer = LighterFreezer(names=["layer1"], except_names=["layer1.weight"]) + freezer = LighterFreezer(name_starts_with=["layer"], except_names=["layer2"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) trainer.fit(dummy_system) - assert dummy_system.model.layer1.weight.requires_grad - assert not dummy_system.model.layer1.bias.requires_grad + assert not dummy_system.model.layer1.weight.requires_grad assert dummy_system.model.layer2.weight.requires_grad + assert not dummy_system.model.layer3.weight.requires_grad From dc50ff49b1bca4e1975ac2d9c9cb1ae0a526c6e6 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 12:27:55 -0500 Subject: [PATCH 20/51] test: Ensure optimizer is correctly set up in freezer exception test --- tests/unit/test_freezer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 4e4142db..a4b4bf03 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -52,3 +52,9 @@ def test_freezer_with_exceptions(dummy_system): assert not dummy_system.model.layer1.weight.requires_grad assert dummy_system.model.layer2.weight.requires_grad assert not dummy_system.model.layer3.weight.requires_grad + + # Ensure that the optimizer is set up correctly + for param_group in dummy_system.optimizer.param_groups: + for param in param_group['params']: + if param.requires_grad: + assert param.grad is not None From 1f2bd410d7d4f8c1b6f329ba487e2cb71f5199f6 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 12:28:34 -0500 Subject: [PATCH 21/51] test: Update assertion to check for grad_fn in freezer test case --- tests/unit/test_freezer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index a4b4bf03..22b52f95 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -57,4 +57,4 @@ def test_freezer_with_exceptions(dummy_system): for param_group in dummy_system.optimizer.param_groups: for param in param_group['params']: if param.requires_grad: - assert param.grad is not None + assert param.grad_fn is not None From 0da59a793d420f9a676acad65106ac32838517cb Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 12:34:48 -0500 Subject: [PATCH 22/51] test: Remove redundant optimizer setup assertions from test_freezer.py --- tests/unit/test_freezer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 22b52f95..4e4142db 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -52,9 +52,3 @@ def test_freezer_with_exceptions(dummy_system): assert not dummy_system.model.layer1.weight.requires_grad assert dummy_system.model.layer2.weight.requires_grad assert not dummy_system.model.layer3.weight.requires_grad - - # Ensure that the optimizer is set up correctly - for param_group in dummy_system.optimizer.param_groups: - for param in param_group['params']: - if param.requires_grad: - assert param.grad_fn is not None From 469af2bc834bcab26ca75b25e6e917547d23da29 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 12:34:48 -0500 Subject: [PATCH 23/51] fix: Correct freezing logic in freezer tests to match expected behavior --- tests/unit/test_freezer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_freezer.py b/tests/unit/test_freezer.py index 4e4142db..488ecf59 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_freezer.py @@ -39,16 +39,20 @@ def test_freezer_initialization(): assert freezer.names == ["layer1"] def test_freezer_functionality(dummy_system): - freezer = LighterFreezer(names=["layer1"]) + freezer = LighterFreezer(names=["layer1.weight", "layer1.bias"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) trainer.fit(dummy_system) assert not dummy_system.model.layer1.weight.requires_grad + assert not dummy_system.model.layer1.bias.requires_grad assert dummy_system.model.layer2.weight.requires_grad def test_freezer_with_exceptions(dummy_system): - freezer = LighterFreezer(name_starts_with=["layer"], except_names=["layer2"]) + freezer = LighterFreezer(name_starts_with=["layer"], except_names=["layer2.weight", "layer2.bias"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) trainer.fit(dummy_system) assert not dummy_system.model.layer1.weight.requires_grad + assert not dummy_system.model.layer1.bias.requires_grad assert dummy_system.model.layer2.weight.requires_grad + assert dummy_system.model.layer2.bias.requires_grad assert not dummy_system.model.layer3.weight.requires_grad + assert not dummy_system.model.layer3.bias.requires_grad From af6363e9a9e54d44ea49bb5173cce3d91b2497ae Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 13:16:14 -0500 Subject: [PATCH 24/51] Fixes, reorganize --- .gitignore | 2 + ...t_freezer.py => test_callbacks_freezer.py} | 13 +- tests/unit/test_callbacks_utils.py | 19 +++ ..._base.py => test_callbacks_writer_base.py} | 2 + ..._file.py => test_callbacks_writer_file.py} | 7 +- ...able.py => test_callbacks_writer_table.py} | 7 +- tests/unit/test_system.py | 147 ++++++++---------- tests/unit/test_utils.py | 13 -- ...{test_collate.py => test_utils_collate.py} | 2 +- ...ports.py => test_utils_dynamic_imports.py} | 2 + ...{test_logging.py => test_utils_logging.py} | 2 +- .../unit/{test_misc.py => test_utils_misc.py} | 2 +- .../{test_model.py => test_utils_model.py} | 8 +- ...{test_patches.py => test_utils_patches.py} | 5 +- .../{test_runner.py => test_utils_runner.py} | 2 + .../{test_schema.py => test_utils_schema.py} | 0 16 files changed, 127 insertions(+), 106 deletions(-) rename tests/unit/{test_freezer.py => test_callbacks_freezer.py} (99%) create mode 100644 tests/unit/test_callbacks_utils.py rename tests/unit/{test_writer_base.py => test_callbacks_writer_base.py} (99%) rename tests/unit/{test_writer_file.py => test_callbacks_writer_file.py} (97%) rename tests/unit/{test_writer_table.py => test_callbacks_writer_table.py} (96%) delete mode 100644 tests/unit/test_utils.py rename tests/unit/{test_collate.py => test_utils_collate.py} (94%) rename tests/unit/{test_dynamic_imports.py => test_utils_dynamic_imports.py} (99%) rename tests/unit/{test_logging.py => test_utils_logging.py} (91%) rename tests/unit/{test_misc.py => test_utils_misc.py} (91%) rename tests/unit/{test_model.py => test_utils_model.py} (86%) rename tests/unit/{test_patches.py => test_utils_patches.py} (94%) rename tests/unit/{test_runner.py => test_utils_runner.py} (98%) rename tests/unit/{test_schema.py => test_utils_schema.py} (100%) diff --git a/.gitignore b/.gitignore index 279ebdef..a9550a7a 100644 --- a/.gitignore +++ b/.gitignore @@ -152,3 +152,5 @@ projects/* **/predictions/ */.DS_Store .DS_Store +.aider* +test_dir/ diff --git a/tests/unit/test_freezer.py b/tests/unit/test_callbacks_freezer.py similarity index 99% rename from tests/unit/test_freezer.py rename to tests/unit/test_callbacks_freezer.py index 488ecf59..dfaf45c7 100644 --- a/tests/unit/test_freezer.py +++ b/tests/unit/test_callbacks_freezer.py @@ -1,10 +1,12 @@ import pytest -from lighter.callbacks.freezer import LighterFreezer import torch +from pytorch_lightning import Trainer from torch.nn import Module -from lighter.system import LighterSystem from torch.utils.data import Dataset -from pytorch_lightning import Trainer + +from lighter.callbacks.freezer import LighterFreezer +from lighter.system import LighterSystem + class DummyModel(Module): def __init__(self): @@ -19,6 +21,7 @@ def forward(self, x): x = self.layer3(x) return x + class DummyDataset(Dataset): def __len__(self): return 10 @@ -26,6 +29,7 @@ def __len__(self): def __getitem__(self, idx): return {"input": torch.randn(10), "target": torch.tensor(0)} + @pytest.fixture def dummy_system(): model = DummyModel() @@ -34,10 +38,12 @@ def dummy_system(): criterion = torch.nn.CrossEntropyLoss() return LighterSystem(model=model, batch_size=32, criterion=criterion, optimizer=optimizer, datasets={"train": dataset}) + def test_freezer_initialization(): freezer = LighterFreezer(names=["layer1"]) assert freezer.names == ["layer1"] + def test_freezer_functionality(dummy_system): freezer = LighterFreezer(names=["layer1.weight", "layer1.bias"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) @@ -46,6 +52,7 @@ def test_freezer_functionality(dummy_system): assert not dummy_system.model.layer1.bias.requires_grad assert dummy_system.model.layer2.weight.requires_grad + def test_freezer_with_exceptions(dummy_system): freezer = LighterFreezer(name_starts_with=["layer"], except_names=["layer2.weight", "layer2.bias"]) trainer = Trainer(callbacks=[freezer], max_epochs=1) diff --git a/tests/unit/test_callbacks_utils.py b/tests/unit/test_callbacks_utils.py new file mode 100644 index 00000000..b515a7a1 --- /dev/null +++ b/tests/unit/test_callbacks_utils.py @@ -0,0 +1,19 @@ +import torch + +from lighter.callbacks.utils import preprocess_image + + +def test_preprocess_image_2d(): + image = torch.rand(1, 3, 64, 64) # Batch of 2D images + processed_image = preprocess_image(image) + assert processed_image.shape == (3, 64, 64) + + +def test_preprocess_image_3d(): + batch_size = 8 + depth = 20 + height = 64 + width = 64 + image = torch.rand(batch_size, 1, depth, height, width) # Batch of 3D images + processed_image = preprocess_image(image) + assert processed_image.shape == (1, depth * height, batch_size * width) diff --git a/tests/unit/test_writer_base.py b/tests/unit/test_callbacks_writer_base.py similarity index 99% rename from tests/unit/test_writer_base.py rename to tests/unit/test_callbacks_writer_base.py index 62814a1d..80ef380f 100644 --- a/tests/unit/test_writer_base.py +++ b/tests/unit/test_callbacks_writer_base.py @@ -1,6 +1,8 @@ import pytest + from lighter.callbacks.writer.base import LighterBaseWriter + def test_writer_initialization(): with pytest.raises(TypeError): writer = LighterBaseWriter(path="test", writer="tensor") diff --git a/tests/unit/test_writer_file.py b/tests/unit/test_callbacks_writer_file.py similarity index 97% rename from tests/unit/test_writer_file.py rename to tests/unit/test_callbacks_writer_file.py index aaa3683c..f120ba68 100644 --- a/tests/unit/test_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -1,14 +1,17 @@ -import pytest -import torch from pathlib import Path + +import torch + from lighter.callbacks.writer.file import LighterFileWriter + def test_file_writer_initialization(): path = Path("test_dir") path.mkdir(exist_ok=True) # Ensure the directory exists writer = LighterFileWriter(path=path, writer="tensor") assert writer.path == Path("test_dir") + def test_file_writer_write_tensor(): writer = LighterFileWriter(path="test_dir", writer="tensor") tensor = torch.tensor([1, 2, 3]) diff --git a/tests/unit/test_writer_table.py b/tests/unit/test_callbacks_writer_table.py similarity index 96% rename from tests/unit/test_writer_table.py rename to tests/unit/test_callbacks_writer_table.py index 7338bd29..7409f654 100644 --- a/tests/unit/test_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -1,12 +1,15 @@ -import pytest -import torch from pathlib import Path + +import torch + from lighter.callbacks.writer.table import LighterTableWriter + def test_table_writer_initialization(): writer = LighterTableWriter(path="test.csv", writer="tensor") assert writer.path == Path("test.csv") + def test_table_writer_write(): writer = LighterTableWriter(path="test.csv", writer="tensor") writer.write(tensor=torch.tensor([1]), id=1) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 554854a5..421b2d04 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -1,57 +1,53 @@ import pytest import torch import torch.nn as nn -from torch.utils.data import Dataset, DataLoader from pytorch_lightning import Trainer from torch.optim import Adam from torch.optim.lr_scheduler import StepLR +from torch.utils.data import DataLoader, Dataset from torchmetrics import Accuracy from lighter.system import LighterSystem + class DummyDataset(Dataset): def __init__(self, size=100): self.size = size - + def __len__(self): return self.size - + def __getitem__(self, idx): x = torch.randn(3, 32, 32) y = torch.randint(0, 10, size=()).long() # Changed to return scalar tensor return {"input": x, "target": y} + class DummyModel(nn.Module): def __init__(self): super().__init__() - self.net = nn.Sequential( - nn.Flatten(), - nn.Linear(3072, 10) - ) - + self.net = nn.Sequential(nn.Flatten(), nn.Linear(3072, 10)) + def forward(self, x): return self.net(x) + class DummySystem(LighterSystem): def __init__(self): model = DummyModel() optimizer = Adam(model.parameters(), lr=0.001) scheduler = StepLR(optimizer, step_size=1) criterion = nn.CrossEntropyLoss() - - datasets = { - "train": DummyDataset(), - "val": DummyDataset(50), - "test": DummyDataset(20) - } - + + datasets = {"train": DummyDataset(), "val": DummyDataset(50), "test": DummyDataset(20)} + metrics = { "train": Accuracy(task="multiclass", num_classes=10), "val": Accuracy(task="multiclass", num_classes=10), - "test": Accuracy(task="multiclass", num_classes=10) + "test": Accuracy(task="multiclass", num_classes=10), } - + super().__init__( model=model, batch_size=32, @@ -59,44 +55,45 @@ def __init__(self): scheduler=scheduler, criterion=criterion, datasets=datasets, - metrics=metrics + metrics=metrics, ) + @pytest.fixture -def basic_system(): +def dummy_system(): return DummySystem() -def test_system_with_trainer(basic_system): + +def test_system_with_trainer(dummy_system): trainer = Trainer(max_epochs=1) - trainer.fit(basic_system) - trainer.fit_loop.setup_data(basic_system.val_dataloader()) - trainer.fit_loop.setup_data(basic_system.test_dataloader()) - trainer.fit_loop.setup_data(basic_system.predict_dataloader()) - assert isinstance(basic_system.model, DummyModel) - assert isinstance(basic_system.model, DummyModel) - assert basic_system.batch_size == 32 - assert isinstance(basic_system.optimizer, Adam) - assert isinstance(basic_system.scheduler, StepLR) - -def test_configure_optimizers(basic_system): - config = basic_system.configure_optimizers() + trainer.fit(dummy_system) + assert dummy_system.batch_size == 32 + assert isinstance(dummy_system.model, DummyModel) + assert isinstance(dummy_system.optimizer, Adam) + assert isinstance(dummy_system.scheduler, StepLR) + + +def test_configure_optimizers(dummy_system): + config = dummy_system.configure_optimizers() assert "optimizer" in config assert "lr_scheduler" in config assert isinstance(config["optimizer"], Adam) assert isinstance(config["lr_scheduler"], StepLR) -def test_dataloader_creation(basic_system): - basic_system.setup("fit") - train_loader = basic_system.train_dataloader() + +def test_dataloader_creation(dummy_system): + dummy_system.setup("fit") + train_loader = dummy_system.train_dataloader() assert isinstance(train_loader, DataLoader) assert train_loader.batch_size == 32 + @pytest.mark.skip(reason="Requires trainer attachment") -def test_training_step(basic_system): - basic_system.setup("fit") - batch = next(iter(basic_system.train_dataloader())) - result = basic_system._base_step(batch, batch_idx=0, mode="train") - +def test_training_step(dummy_system): + dummy_system.setup("fit") + batch = next(iter(dummy_system.train_dataloader())) + result = dummy_system._base_step(batch, batch_idx=0, mode="train") + assert "loss" in result assert "metrics" in result assert "input" in result @@ -104,60 +101,52 @@ def test_training_step(basic_system): assert "pred" in result assert torch.is_tensor(result["loss"]) + @pytest.mark.skip(reason="Requires trainer attachment") -def test_validation_step(basic_system): - basic_system.setup("validate") - batch = next(iter(basic_system.val_dataloader())) - result = basic_system._base_step(batch, batch_idx=0, mode="val") - +def test_validation_step(dummy_system): + dummy_system.setup("validate") + batch = next(iter(dummy_system.val_dataloader())) + result = dummy_system._base_step(batch, batch_idx=0, mode="val") + assert "loss" in result assert "metrics" in result assert torch.is_tensor(result["loss"]) -def test_predict_step(basic_system): - basic_system.setup("predict") + +def test_predict_step(dummy_system): + dummy_system.setup("predict") batch = {"input": torch.randn(1, 3, 32, 32)} - result = basic_system._base_step(batch, batch_idx=0, mode="predict") - + result = dummy_system._base_step(batch, batch_idx=0, mode="predict") + assert "pred" in result assert torch.is_tensor(result["pred"]) -def test_learning_rate_property(basic_system): - initial_lr = basic_system.learning_rate - assert initial_lr == 0.001 - - basic_system.learning_rate = 0.01 - assert basic_system.learning_rate == 0.01 -@pytest.mark.parametrize("batch", [ +def test_learning_rate_property(dummy_system): + initial_lr = dummy_system.learning_rate + assert initial_lr == 0.001 + dummy_system.learning_rate = 0.01 + assert dummy_system.learning_rate == 0.01 - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long(), "id": "test_id"}, -]) +@pytest.mark.parametrize( + "batch", + [ + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long(), "id": "test_id"}, + ], +) @pytest.mark.skip(reason="Requires trainer attachment") -def test_valid_batch_formats(basic_system, batch): - basic_system.setup("fit") - result = basic_system._base_step(batch, batch_idx=0, mode="train") +def test_valid_batch_formats(dummy_system, batch): + dummy_system.setup("fit") + result = dummy_system._base_step(batch, batch_idx=0, mode="train") assert isinstance(result, dict) + @pytest.mark.xfail(raises=ValueError) -def test_invalid_batch_format(basic_system): - basic_system.setup("fit") +def test_invalid_batch_format(dummy_system): + dummy_system.setup("fit") invalid_batch = {"wrong_key": torch.randn(1, 3, 32, 32)} - basic_system._base_step(invalid_batch, batch_idx=0, mode="train") -import pytest -from lighter.system import LighterSystem -from lighter.system import LighterSystem - -class DummyModel(nn.Module): - def __init__(self): - super().__init__() - self.layer = nn.Linear(10, 10) - -def test_system_initialization(): - model = DummyModel() - system = LighterSystem(model=model, batch_size=32) - assert system.batch_size == 32 + dummy_system._base_step(invalid_batch, batch_idx=0, mode="train") diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py deleted file mode 100644 index 4f90ff2b..00000000 --- a/tests/unit/test_utils.py +++ /dev/null @@ -1,13 +0,0 @@ -import pytest -import torch -from lighter.callbacks.utils import preprocess_image - -def test_preprocess_image_2d(): - image = torch.rand(1, 3, 64, 64) - processed_image = preprocess_image(image) - assert processed_image.shape == (3, 64, 64) - -def test_preprocess_image_3d(): - image = torch.rand(1, 3, 4, 64, 64) - processed_image = preprocess_image(image) - assert processed_image.shape[0] == 3 diff --git a/tests/unit/test_collate.py b/tests/unit/test_utils_collate.py similarity index 94% rename from tests/unit/test_collate.py rename to tests/unit/test_utils_collate.py index f53a958f..b402ce02 100644 --- a/tests/unit/test_collate.py +++ b/tests/unit/test_utils_collate.py @@ -1,6 +1,6 @@ -import pytest from lighter.utils.collate import collate_replace_corrupted + def test_collate_replace_corrupted(): batch = [1, None, 2, None, 3] dataset = [1, 2, 3, 4, 5] diff --git a/tests/unit/test_dynamic_imports.py b/tests/unit/test_utils_dynamic_imports.py similarity index 99% rename from tests/unit/test_dynamic_imports.py rename to tests/unit/test_utils_dynamic_imports.py index c4350215..9db03f77 100644 --- a/tests/unit/test_dynamic_imports.py +++ b/tests/unit/test_utils_dynamic_imports.py @@ -1,6 +1,8 @@ import pytest + from lighter.utils.dynamic_imports import import_module_from_path + def test_import_module_from_path(): with pytest.raises(FileNotFoundError): import_module_from_path("non_existent_module", "non_existent_path") diff --git a/tests/unit/test_logging.py b/tests/unit/test_utils_logging.py similarity index 91% rename from tests/unit/test_logging.py rename to tests/unit/test_utils_logging.py index 3345affa..01ca8530 100644 --- a/tests/unit/test_logging.py +++ b/tests/unit/test_utils_logging.py @@ -1,6 +1,6 @@ -import pytest from lighter.utils.logging import _setup_logging + def test_setup_logging(): _setup_logging() assert True # Just ensure no exceptions are raised diff --git a/tests/unit/test_misc.py b/tests/unit/test_utils_misc.py similarity index 91% rename from tests/unit/test_misc.py rename to tests/unit/test_utils_misc.py index 693a6d87..8a2b37d5 100644 --- a/tests/unit/test_misc.py +++ b/tests/unit/test_utils_misc.py @@ -1,6 +1,6 @@ -import pytest from lighter.utils.misc import ensure_list + def test_ensure_list(): assert ensure_list(1) == [1] assert ensure_list([1, 2]) == [1, 2] diff --git a/tests/unit/test_model.py b/tests/unit/test_utils_model.py similarity index 86% rename from tests/unit/test_model.py rename to tests/unit/test_utils_model.py index 19167047..bb435d25 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_utils_model.py @@ -1,7 +1,8 @@ -import pytest import torch from torch.nn import Linear, Sequential -from lighter.utils.model import replace_layer_with, replace_layer_with_identity, remove_n_last_layers_sequentially + +from lighter.utils.model import remove_n_last_layers_sequentially, replace_layer_with, replace_layer_with_identity + class SimpleModel(torch.nn.Module): def __init__(self): @@ -12,17 +13,20 @@ def __init__(self): def forward(self, x): return self.layer2(self.layer1(x)) + def test_replace_layer_with(): model = SimpleModel() new_layer = Linear(10, 10) replace_layer_with(model, "layer1", new_layer) assert model.layer1 == new_layer + def test_replace_layer_with_identity(): model = SimpleModel() replace_layer_with_identity(model, "layer1") assert isinstance(model.layer1, torch.nn.Identity) + def test_remove_n_last_layers_sequentially(): model = Sequential(Linear(10, 10), Linear(10, 10), Linear(10, 10)) new_model = remove_n_last_layers_sequentially(model, num_layers=1) diff --git a/tests/unit/test_patches.py b/tests/unit/test_utils_patches.py similarity index 94% rename from tests/unit/test_patches.py rename to tests/unit/test_utils_patches.py index 07fc7fd8..4432e87f 100644 --- a/tests/unit/test_patches.py +++ b/tests/unit/test_utils_patches.py @@ -1,7 +1,8 @@ -import pytest -from lighter.utils.patches import PatchedModuleDict from torch.nn import Linear +from lighter.utils.patches import PatchedModuleDict + + def test_patched_module_dict(): modules = {"layer1": Linear(10, 10)} patched_dict = PatchedModuleDict(modules) diff --git a/tests/unit/test_runner.py b/tests/unit/test_utils_runner.py similarity index 98% rename from tests/unit/test_runner.py rename to tests/unit/test_utils_runner.py index 5cb09cc7..1fc6939a 100644 --- a/tests/unit/test_runner.py +++ b/tests/unit/test_utils_runner.py @@ -1,6 +1,8 @@ import pytest + from lighter.utils.runner import parse_config + def test_parse_config_no_config(): with pytest.raises(ValueError): parse_config() diff --git a/tests/unit/test_schema.py b/tests/unit/test_utils_schema.py similarity index 100% rename from tests/unit/test_schema.py rename to tests/unit/test_utils_schema.py From dd4cb7c6d7fb32bf0d606cd423b3a04ccbf57f66 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 16:59:43 -0500 Subject: [PATCH 25/51] test: Remove skip markers from training and validation step tests --- tests/unit/test_system.py | 4 +--- tests/unit/test_utils_patches.py | 33 ++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 421b2d04..74488df5 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -88,7 +88,6 @@ def test_dataloader_creation(dummy_system): assert train_loader.batch_size == 32 -@pytest.mark.skip(reason="Requires trainer attachment") def test_training_step(dummy_system): dummy_system.setup("fit") batch = next(iter(dummy_system.train_dataloader())) @@ -102,7 +101,6 @@ def test_training_step(dummy_system): assert torch.is_tensor(result["loss"]) -@pytest.mark.skip(reason="Requires trainer attachment") def test_validation_step(dummy_system): dummy_system.setup("validate") batch = next(iter(dummy_system.val_dataloader())) @@ -138,7 +136,7 @@ def test_learning_rate_property(dummy_system): {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long(), "id": "test_id"}, ], ) -@pytest.mark.skip(reason="Requires trainer attachment") + def test_valid_batch_formats(dummy_system, batch): dummy_system.setup("fit") result = dummy_system._base_step(batch, batch_idx=0, mode="train") diff --git a/tests/unit/test_utils_patches.py b/tests/unit/test_utils_patches.py index 4432e87f..4b316f02 100644 --- a/tests/unit/test_utils_patches.py +++ b/tests/unit/test_utils_patches.py @@ -1,9 +1,30 @@ -from torch.nn import Linear +from torch.nn import Linear, Module +import pytest from lighter.utils.patches import PatchedModuleDict - -def test_patched_module_dict(): - modules = {"layer1": Linear(10, 10)} - patched_dict = PatchedModuleDict(modules) - assert "layer1" in patched_dict +def test_patched_module_dict_handles_reserved_names(): + # Test with previously problematic reserved names + reserved_names = { + 'type': None, + 'to': None, + 'forward': 1, + 'training': "123", + 'modules': [Linear(10, 10)] + } + + # Should work without raising exceptions + patched_dict = PatchedModuleDict(reserved_names) + + # Verify all keys are accessible + for key in reserved_names: + assert key in patched_dict + assert isinstance(patched_dict[key], Linear) + + # Test dictionary operations + assert set(patched_dict.keys()) == set(reserved_names.keys()) + assert len(patched_dict.values()) == len(reserved_names) + + # Test deletion + del patched_dict['type'] + assert 'type' not in patched_dict From 93fe1d4cb11bc2d7ba709ad72074155812b50c6f Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 16:59:44 -0500 Subject: [PATCH 26/51] fix: Attach DummySystem to Trainer and resolve batch size mismatches --- tests/unit/test_system.py | 10 +++++++--- tests/unit/test_utils_patches.py | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 74488df5..4caaa902 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -91,6 +91,8 @@ def test_dataloader_creation(dummy_system): def test_training_step(dummy_system): dummy_system.setup("fit") batch = next(iter(dummy_system.train_dataloader())) + trainer = Trainer() + trainer.fit(dummy_system) result = dummy_system._base_step(batch, batch_idx=0, mode="train") assert "loss" in result @@ -104,6 +106,8 @@ def test_training_step(dummy_system): def test_validation_step(dummy_system): dummy_system.setup("validate") batch = next(iter(dummy_system.val_dataloader())) + trainer = Trainer() + trainer.validate(dummy_system) result = dummy_system._base_step(batch, batch_idx=0, mode="val") assert "loss" in result @@ -131,9 +135,9 @@ def test_learning_rate_property(dummy_system): @pytest.mark.parametrize( "batch", [ - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=()).long(), "id": "test_id"}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long()}, + {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long(), "id": "test_id"}, ], ) diff --git a/tests/unit/test_utils_patches.py b/tests/unit/test_utils_patches.py index 4b316f02..469cee20 100644 --- a/tests/unit/test_utils_patches.py +++ b/tests/unit/test_utils_patches.py @@ -8,8 +8,8 @@ def test_patched_module_dict_handles_reserved_names(): reserved_names = { 'type': None, 'to': None, - 'forward': 1, - 'training': "123", + 'forward': Linear(10, 10), + 'training': Linear(10, 10), 'modules': [Linear(10, 10)] } From 92fca36bd0bf7cbaf45fe9daa83821bd6c3be9a1 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 17:05:44 -0500 Subject: [PATCH 27/51] fix: Ensure DummySystem is attached to Trainer in test_valid_batch_formats --- tests/unit/test_system.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 4caaa902..7a292fca 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -93,6 +93,8 @@ def test_training_step(dummy_system): batch = next(iter(dummy_system.train_dataloader())) trainer = Trainer() trainer.fit(dummy_system) + trainer = Trainer(max_epochs=1) + trainer.fit(dummy_system) result = dummy_system._base_step(batch, batch_idx=0, mode="train") assert "loss" in result From db006e63e3151ce397248a08951e5e478ed32492 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 17:44:01 -0500 Subject: [PATCH 28/51] test: Refactor training, validation, and prediction steps in tests --- tests/unit/test_system.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 7a292fca..6f300842 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -89,12 +89,9 @@ def test_dataloader_creation(dummy_system): def test_training_step(dummy_system): - dummy_system.setup("fit") - batch = next(iter(dummy_system.train_dataloader())) - trainer = Trainer() - trainer.fit(dummy_system) trainer = Trainer(max_epochs=1) - trainer.fit(dummy_system) + trainer.fit(dummy_system) # Only to attach the system to the trainer + batch = next(iter(dummy_system.train_dataloader())) result = dummy_system._base_step(batch, batch_idx=0, mode="train") assert "loss" in result @@ -106,10 +103,9 @@ def test_training_step(dummy_system): def test_validation_step(dummy_system): - dummy_system.setup("validate") + trainer = Trainer(max_epochs=1) + trainer.validate(dummy_system) # Only to attach the system to the trainer batch = next(iter(dummy_system.val_dataloader())) - trainer = Trainer() - trainer.validate(dummy_system) result = dummy_system._base_step(batch, batch_idx=0, mode="val") assert "loss" in result @@ -118,7 +114,8 @@ def test_validation_step(dummy_system): def test_predict_step(dummy_system): - dummy_system.setup("predict") + trainer = Trainer(max_epochs=1) + trainer.predict(dummy_system) # Only to attach the system to the trainer batch = {"input": torch.randn(1, 3, 32, 32)} result = dummy_system._base_step(batch, batch_idx=0, mode="predict") @@ -133,24 +130,24 @@ def test_learning_rate_property(dummy_system): dummy_system.learning_rate = 0.01 assert dummy_system.learning_rate == 0.01 - @pytest.mark.parametrize( "batch", [ {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long()}, - {"input": torch.randn(1, 3, 32, 32), "target": torch.randint(0, 10, size=(1,)).long(), "id": "test_id"}, + {"input": torch.randn(2, 3, 32, 32), "target": torch.randint(0, 10, size=(2,)).long()}, + {"input": torch.randn(4, 3, 32, 32), "target": torch.randint(0, 10, size=(4,)).long(), "id": "test_id"}, ], ) - def test_valid_batch_formats(dummy_system, batch): - dummy_system.setup("fit") + trainer = Trainer(max_epochs=1) + trainer.fit(dummy_system) # Only to attach the system to the trainer result = dummy_system._base_step(batch, batch_idx=0, mode="train") assert isinstance(result, dict) @pytest.mark.xfail(raises=ValueError) def test_invalid_batch_format(dummy_system): - dummy_system.setup("fit") invalid_batch = {"wrong_key": torch.randn(1, 3, 32, 32)} + trainer = Trainer(max_epochs=1) + trainer.fit(dummy_system) # Only to attach the system to the trainer dummy_system._base_step(invalid_batch, batch_idx=0, mode="train") From 6157ebb0e9bafd4a0706d46bba14147dd8575101 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 17:44:01 -0500 Subject: [PATCH 29/51] feat: Add predict dataset to DummySystem and update corresponding tests --- tests/unit/test_system.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 6f300842..9557e237 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -24,7 +24,17 @@ def __getitem__(self, idx): return {"input": x, "target": y} -class DummyModel(nn.Module): +class DummyPredictDataset(Dataset): + def __init__(self, size=20): + self.size = size + + def __len__(self): + return self.size + + def __getitem__(self, idx): + x = torch.randn(3, 32, 32) + return {"input": x} + def __init__(self): super().__init__() self.net = nn.Sequential(nn.Flatten(), nn.Linear(3072, 10)) @@ -40,7 +50,12 @@ def __init__(self): scheduler = StepLR(optimizer, step_size=1) criterion = nn.CrossEntropyLoss() - datasets = {"train": DummyDataset(), "val": DummyDataset(50), "test": DummyDataset(20)} + datasets = { + "train": DummyDataset(), + "val": DummyDataset(50), + "test": DummyDataset(20), + "predict": DummyPredictDataset(20), + } metrics = { "train": Accuracy(task="multiclass", num_classes=10), @@ -116,7 +131,7 @@ def test_validation_step(dummy_system): def test_predict_step(dummy_system): trainer = Trainer(max_epochs=1) trainer.predict(dummy_system) # Only to attach the system to the trainer - batch = {"input": torch.randn(1, 3, 32, 32)} + batch = next(iter(dummy_system.predict_dataloader())) result = dummy_system._base_step(batch, batch_idx=0, mode="predict") assert "pred" in result From ad01f6a7696948e5fa054c5040f190dec788e0fb Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 18:52:56 -0500 Subject: [PATCH 30/51] test: Update unit tests to use training_step with mock logging --- tests/unit/test_system.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 9557e237..59eeada0 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -8,6 +8,7 @@ from torchmetrics import Accuracy from lighter.system import LighterSystem +from unittest import mock class DummyDataset(Dataset): @@ -35,6 +36,8 @@ def __getitem__(self, idx): x = torch.randn(3, 32, 32) return {"input": x} + +class DummyModel(nn.Module): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Flatten(), nn.Linear(3072, 10)) @@ -107,7 +110,10 @@ def test_training_step(dummy_system): trainer = Trainer(max_epochs=1) trainer.fit(dummy_system) # Only to attach the system to the trainer batch = next(iter(dummy_system.train_dataloader())) - result = dummy_system._base_step(batch, batch_idx=0, mode="train") + + # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 + with mock.patch.object(dummy_system, 'log'): + result = dummy_system.training_step(batch, batch_idx=0) assert "loss" in result assert "metrics" in result @@ -121,7 +127,10 @@ def test_validation_step(dummy_system): trainer = Trainer(max_epochs=1) trainer.validate(dummy_system) # Only to attach the system to the trainer batch = next(iter(dummy_system.val_dataloader())) - result = dummy_system._base_step(batch, batch_idx=0, mode="val") + + # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 + with mock.patch.object(dummy_system, 'log'): + result = dummy_system.validation_step(batch, batch_idx=0) assert "loss" in result assert "metrics" in result @@ -132,7 +141,10 @@ def test_predict_step(dummy_system): trainer = Trainer(max_epochs=1) trainer.predict(dummy_system) # Only to attach the system to the trainer batch = next(iter(dummy_system.predict_dataloader())) - result = dummy_system._base_step(batch, batch_idx=0, mode="predict") + + # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 + with mock.patch.object(dummy_system, 'log'): + result = dummy_system.predict_step(batch, batch_idx=0) assert "pred" in result assert torch.is_tensor(result["pred"]) @@ -156,7 +168,10 @@ def test_learning_rate_property(dummy_system): def test_valid_batch_formats(dummy_system, batch): trainer = Trainer(max_epochs=1) trainer.fit(dummy_system) # Only to attach the system to the trainer - result = dummy_system._base_step(batch, batch_idx=0, mode="train") + + # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 + with mock.patch.object(dummy_system, 'log'): + result = dummy_system.training_step(batch, batch_idx=0) assert isinstance(result, dict) @@ -165,4 +180,7 @@ def test_invalid_batch_format(dummy_system): invalid_batch = {"wrong_key": torch.randn(1, 3, 32, 32)} trainer = Trainer(max_epochs=1) trainer.fit(dummy_system) # Only to attach the system to the trainer - dummy_system._base_step(invalid_batch, batch_idx=0, mode="train") + + # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 + with mock.patch.object(dummy_system, 'log'): + dummy_system.training_step(invalid_batch, batch_idx=0) From 946da7e6660620437946bfaecc85a71cc73e4cc2 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 18:52:57 -0500 Subject: [PATCH 31/51] test: Add unit tests for empty datasets, model forward pass, and system initialization --- tests/unit/test_system.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 59eeada0..1a719cfd 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -82,7 +82,34 @@ def dummy_system(): return DummySystem() -def test_system_with_trainer(dummy_system): +def test_empty_dataset(): + empty_dataset = DummyDataset(size=0) + assert len(empty_dataset) == 0 + with pytest.raises(IndexError): + _ = empty_dataset[0] + +def test_empty_predict_dataset(): + empty_predict_dataset = DummyPredictDataset(size=0) + assert len(empty_predict_dataset) == 0 + with pytest.raises(IndexError): + _ = empty_predict_dataset[0] + +def test_model_forward_pass(): + model = DummyModel() + input_tensor = torch.randn(1, 3, 32, 32) + output = model(input_tensor) + assert output.shape == (1, 10) + +def test_system_initialization(): + system = DummySystem() + assert isinstance(system.model, DummyModel) + assert isinstance(system.optimizer, Adam) + assert isinstance(system.scheduler, StepLR) + assert isinstance(system.criterion, nn.CrossEntropyLoss) + assert "train" in system.datasets + assert "val" in system.datasets + assert "test" in system.datasets + assert "predict" in system.datasets trainer = Trainer(max_epochs=1) trainer.fit(dummy_system) assert dummy_system.batch_size == 32 From 7070c1ccd12b2f18b72e2eab29c76266d81533d2 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 18:56:42 -0500 Subject: [PATCH 32/51] Fix tests --- lighter/utils/runner.py | 2 +- tests/unit/test_system.py | 43 +++++++------------------------- tests/unit/test_utils_model.py | 12 ++++++--- tests/unit/test_utils_patches.py | 27 ++++++++------------ 4 files changed, 29 insertions(+), 55 deletions(-) diff --git a/lighter/utils/runner.py b/lighter/utils/runner.py index 508bce05..8a3cd83a 100644 --- a/lighter/utils/runner.py +++ b/lighter/utils/runner.py @@ -36,7 +36,7 @@ def parse_config(**kwargs) -> ConfigParser: raise ValueError("'--config' not specified. Please provide a valid configuration file.") # Initialize the parser with the predefined structure. - parser = ConfigParser(ConfigSchema().dict(), globals=False) + parser = ConfigParser(ConfigSchema().model_dump(), globals=False) # Update the parser with the configuration file. parser.update(parser.load_config_files(kwargs.pop("config"))) # Update the parser with the provided cli arguments. diff --git a/tests/unit/test_system.py b/tests/unit/test_system.py index 1a719cfd..b9d31ce0 100644 --- a/tests/unit/test_system.py +++ b/tests/unit/test_system.py @@ -1,3 +1,5 @@ +from unittest import mock + import pytest import torch import torch.nn as nn @@ -8,7 +10,6 @@ from torchmetrics import Accuracy from lighter.system import LighterSystem -from unittest import mock class DummyDataset(Dataset): @@ -82,34 +83,7 @@ def dummy_system(): return DummySystem() -def test_empty_dataset(): - empty_dataset = DummyDataset(size=0) - assert len(empty_dataset) == 0 - with pytest.raises(IndexError): - _ = empty_dataset[0] - -def test_empty_predict_dataset(): - empty_predict_dataset = DummyPredictDataset(size=0) - assert len(empty_predict_dataset) == 0 - with pytest.raises(IndexError): - _ = empty_predict_dataset[0] - -def test_model_forward_pass(): - model = DummyModel() - input_tensor = torch.randn(1, 3, 32, 32) - output = model(input_tensor) - assert output.shape == (1, 10) - -def test_system_initialization(): - system = DummySystem() - assert isinstance(system.model, DummyModel) - assert isinstance(system.optimizer, Adam) - assert isinstance(system.scheduler, StepLR) - assert isinstance(system.criterion, nn.CrossEntropyLoss) - assert "train" in system.datasets - assert "val" in system.datasets - assert "test" in system.datasets - assert "predict" in system.datasets +def test_system_with_trainer(dummy_system): trainer = Trainer(max_epochs=1) trainer.fit(dummy_system) assert dummy_system.batch_size == 32 @@ -139,7 +113,7 @@ def test_training_step(dummy_system): batch = next(iter(dummy_system.train_dataloader())) # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 - with mock.patch.object(dummy_system, 'log'): + with mock.patch.object(dummy_system, "log"): result = dummy_system.training_step(batch, batch_idx=0) assert "loss" in result @@ -156,7 +130,7 @@ def test_validation_step(dummy_system): batch = next(iter(dummy_system.val_dataloader())) # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 - with mock.patch.object(dummy_system, 'log'): + with mock.patch.object(dummy_system, "log"): result = dummy_system.validation_step(batch, batch_idx=0) assert "loss" in result @@ -170,7 +144,7 @@ def test_predict_step(dummy_system): batch = next(iter(dummy_system.predict_dataloader())) # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 - with mock.patch.object(dummy_system, 'log'): + with mock.patch.object(dummy_system, "log"): result = dummy_system.predict_step(batch, batch_idx=0) assert "pred" in result @@ -184,6 +158,7 @@ def test_learning_rate_property(dummy_system): dummy_system.learning_rate = 0.01 assert dummy_system.learning_rate == 0.01 + @pytest.mark.parametrize( "batch", [ @@ -197,7 +172,7 @@ def test_valid_batch_formats(dummy_system, batch): trainer.fit(dummy_system) # Only to attach the system to the trainer # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 - with mock.patch.object(dummy_system, 'log'): + with mock.patch.object(dummy_system, "log"): result = dummy_system.training_step(batch, batch_idx=0) assert isinstance(result, dict) @@ -209,5 +184,5 @@ def test_invalid_batch_format(dummy_system): trainer.fit(dummy_system) # Only to attach the system to the trainer # https://github.com/Lightning-AI/pytorch-lightning/issues/9674#issuecomment-926243063 - with mock.patch.object(dummy_system, 'log'): + with mock.patch.object(dummy_system, "log"): dummy_system.training_step(invalid_batch, batch_idx=0) diff --git a/tests/unit/test_utils_model.py b/tests/unit/test_utils_model.py index bb435d25..510adc0b 100644 --- a/tests/unit/test_utils_model.py +++ b/tests/unit/test_utils_model.py @@ -4,7 +4,7 @@ from lighter.utils.model import remove_n_last_layers_sequentially, replace_layer_with, replace_layer_with_identity -class SimpleModel(torch.nn.Module): +class DummyModel(torch.nn.Module): def __init__(self): super().__init__() self.layer1 = Linear(10, 10) @@ -15,14 +15,14 @@ def forward(self, x): def test_replace_layer_with(): - model = SimpleModel() - new_layer = Linear(10, 10) + model = DummyModel() + new_layer = Linear(10, 4) replace_layer_with(model, "layer1", new_layer) assert model.layer1 == new_layer def test_replace_layer_with_identity(): - model = SimpleModel() + model = DummyModel() replace_layer_with_identity(model, "layer1") assert isinstance(model.layer1, torch.nn.Identity) @@ -31,3 +31,7 @@ def test_remove_n_last_layers_sequentially(): model = Sequential(Linear(10, 10), Linear(10, 10), Linear(10, 10)) new_model = remove_n_last_layers_sequentially(model, num_layers=1) assert len(new_model) == 2 + + model = Sequential(Linear(10, 10), Linear(10, 10), Linear(10, 10)) + new_model = remove_n_last_layers_sequentially(model, num_layers=2) + assert len(new_model) == 1 diff --git a/tests/unit/test_utils_patches.py b/tests/unit/test_utils_patches.py index 469cee20..195b5287 100644 --- a/tests/unit/test_utils_patches.py +++ b/tests/unit/test_utils_patches.py @@ -1,30 +1,25 @@ from torch.nn import Linear, Module -import pytest from lighter.utils.patches import PatchedModuleDict + def test_patched_module_dict_handles_reserved_names(): # Test with previously problematic reserved names reserved_names = { - 'type': None, - 'to': None, - 'forward': Linear(10, 10), - 'training': Linear(10, 10), - 'modules': [Linear(10, 10)] + "type": None, + "to": None, + "forward": Linear(10, 10), + "training": Linear(10, 10), } - + # Should work without raising exceptions patched_dict = PatchedModuleDict(reserved_names) - + # Verify all keys are accessible for key in reserved_names: assert key in patched_dict - assert isinstance(patched_dict[key], Linear) - - # Test dictionary operations - assert set(patched_dict.keys()) == set(reserved_names.keys()) - assert len(patched_dict.values()) == len(reserved_names) - + assert patched_dict[key] == reserved_names[key] + # Test deletion - del patched_dict['type'] - assert 'type' not in patched_dict + del patched_dict["type"] + assert "type" not in patched_dict From b2333b8fe1927f83944bd98f4f5a342273b884d3 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:07:17 -0500 Subject: [PATCH 33/51] Update tests/unit/test_callbacks_writer_file.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- tests/unit/test_callbacks_writer_file.py | 35 +++++++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index f120ba68..e61663d0 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -12,8 +12,35 @@ def test_file_writer_initialization(): assert writer.path == Path("test_dir") +import pytest + def test_file_writer_write_tensor(): - writer = LighterFileWriter(path="test_dir", writer="tensor") - tensor = torch.tensor([1, 2, 3]) - writer.write(tensor, id=1) - assert (writer.path / "1.pt").exists() + """Test LighterFileWriter's ability to write and persist tensors correctly.""" + test_dir = Path("test_dir_tensor") + test_dir.mkdir(exist_ok=True) + try: + writer = LighterFileWriter(path=test_dir, writer="tensor") + tensor = torch.tensor([1, 2, 3]) + writer.write(tensor, id=1) + + # Verify file exists + saved_path = writer.path / "1.pt" + assert saved_path.exists() + + # Verify tensor contents + loaded_tensor = torch.load(saved_path) + assert torch.equal(loaded_tensor, tensor) + finally: + shutil.rmtree(test_dir) + +def test_file_writer_write_tensor_errors(): + """Test error handling in LighterFileWriter.""" + writer = LighterFileWriter(path="test_dir_errors", writer="tensor") + + # Test invalid tensor + with pytest.raises(TypeError): + writer.write("not a tensor", id=1) + + # Test invalid ID + with pytest.raises(ValueError): + writer.write(torch.tensor([1]), id=-1) From 776a53114db98f37806b73ec886a5ce22b176481 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:08:33 -0500 Subject: [PATCH 34/51] Update tests/unit/test_callbacks_writer_table.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- tests/unit/test_callbacks_writer_table.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 7409f654..2af3e6a7 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -11,6 +11,27 @@ def test_table_writer_initialization(): def test_table_writer_write(): + """Test LighterTableWriter write functionality with various inputs.""" + test_file = Path("test.csv") writer = LighterTableWriter(path="test.csv", writer="tensor") - writer.write(tensor=torch.tensor([1]), id=1) + + # Test basic write + test_tensor = torch.tensor([1, 2, 3]) + writer.write(tensor=test_tensor, id=1) assert len(writer.csv_records) == 1 + assert writer.csv_records[0]["tensor"] == test_tensor.tolist() + assert writer.csv_records[0]["id"] == 1 + + # Test edge cases + writer.write(tensor=torch.tensor([]), id=2) # empty tensor + writer.write(tensor=torch.randn(1000), id=3) # large tensor + writer.write(tensor=torch.tensor([1.5, 2.5]), id=4) # float tensor + + # Verify file creation and content + assert test_file.exists() + with open(test_file) as f: + content = f.read() + assert "1,2,3" in content # verify first tensor + + # Cleanup + test_file.unlink() From 8ae2af0c521280eaeb28f5d251171082114c1041 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:09:14 -0500 Subject: [PATCH 35/51] Update tests/unit/test_callbacks_writer_file.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- tests/unit/test_callbacks_writer_file.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index e61663d0..deab3378 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -5,12 +5,18 @@ from lighter.callbacks.writer.file import LighterFileWriter +import shutil + def test_file_writer_initialization(): + """Test LighterFileWriter initialization with proper attributes.""" path = Path("test_dir") path.mkdir(exist_ok=True) # Ensure the directory exists - writer = LighterFileWriter(path=path, writer="tensor") - assert writer.path == Path("test_dir") - + try: + writer = LighterFileWriter(path=path, writer="tensor") + assert writer.path == Path("test_dir") + assert writer.writer == "tensor" # Verify writer type + finally: + shutil.rmtree(path) # Clean up after test import pytest From f42a089780d67ca65df8597510571b49a27129cd Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:09:46 -0500 Subject: [PATCH 36/51] Update tests/unit/test_utils_collate.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- tests/unit/test_utils_collate.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_utils_collate.py b/tests/unit/test_utils_collate.py index b402ce02..96283578 100644 --- a/tests/unit/test_utils_collate.py +++ b/tests/unit/test_utils_collate.py @@ -2,7 +2,34 @@ def test_collate_replace_corrupted(): + """Test collate_replace_corrupted function handles corrupted data correctly. + + Tests: + - Corrupted values (None) are replaced with valid dataset values + - Non-corrupted values remain unchanged + - Output maintains correct length + - Edge cases: empty batch, all corrupted values + """ batch = [1, None, 2, None, 3] dataset = [1, 2, 3, 4, 5] collated_batch = collate_replace_corrupted(batch, dataset) - assert len(collated_batch) == 5 + # Test length + assert len(collated_batch) == len(batch) + + # Test non-corrupted values remain unchanged + assert collated_batch[0] == 1 + assert collated_batch[2] == 2 + assert collated_batch[4] == 3 + + # Test corrupted values are replaced with valid dataset values + assert collated_batch[1] in dataset + assert collated_batch[3] in dataset + + # Test edge cases + empty_batch = [] + assert len(collate_replace_corrupted(empty_batch, dataset)) == 0 + + all_corrupted = [None, None, None] + collated_all_corrupted = collate_replace_corrupted(all_corrupted, dataset) + assert len(collated_all_corrupted) == len(all_corrupted) + assert all(val in dataset for val in collated_all_corrupted) From 98af60b5c6b515c8cef471c09e061894f302d0ee Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:10:11 -0500 Subject: [PATCH 37/51] Update tests/unit/test_callbacks_writer_base.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- tests/unit/test_callbacks_writer_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_callbacks_writer_base.py b/tests/unit/test_callbacks_writer_base.py index 80ef380f..22d7fd00 100644 --- a/tests/unit/test_callbacks_writer_base.py +++ b/tests/unit/test_callbacks_writer_base.py @@ -5,4 +5,4 @@ def test_writer_initialization(): with pytest.raises(TypeError): - writer = LighterBaseWriter(path="test", writer="tensor") + LighterBaseWriter(path="test", writer="tensor") From 85913cd6d0612f7d527735fba9e99655e934727b Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:27:01 -0500 Subject: [PATCH 38/51] test: Fix assertion in table writer test for correct key usage --- tests/unit/test_callbacks_writer_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 2af3e6a7..da63fab6 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -19,7 +19,7 @@ def test_table_writer_write(): test_tensor = torch.tensor([1, 2, 3]) writer.write(tensor=test_tensor, id=1) assert len(writer.csv_records) == 1 - assert writer.csv_records[0]["tensor"] == test_tensor.tolist() + assert writer.csv_records[0]["pred"] == test_tensor.tolist() assert writer.csv_records[0]["id"] == 1 # Test edge cases From 6bf6f7671179558f829260be161386b4e24e92e4 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 19:27:01 -0500 Subject: [PATCH 39/51] test: Add comprehensive tests for LighterTableWriter functionality --- tests/unit/test_callbacks_writer_table.py | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index da63fab6..50e56493 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -1,14 +1,39 @@ +from unittest import mock from pathlib import Path - +import pytest import torch - +from pytorch_lightning import Trainer from lighter.callbacks.writer.table import LighterTableWriter +from lighter.system import LighterSystem +def custom_writer(tensor): + return {"custom": tensor.sum().item()} def test_table_writer_initialization(): writer = LighterTableWriter(path="test.csv", writer="tensor") assert writer.path == Path("test.csv") +def test_table_writer_custom_writer(): + writer = LighterTableWriter(path="test.csv", writer=custom_writer) + test_tensor = torch.tensor([1, 2, 3]) + writer.write(tensor=test_tensor, id=1) + assert writer.csv_records[0]["pred"] == {"custom": 6} + +def test_table_writer_distributed_gather(tmp_path): + writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") + trainer = mock.Mock() + trainer.world_size = 2 + trainer.is_global_zero = True + writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] + writer.on_predict_epoch_end(trainer, mock.Mock()) + assert (tmp_path / "test.csv").exists() + +def test_table_writer_edge_cases(): + writer = LighterTableWriter(path="test.csv", writer="tensor") + writer.write(tensor=torch.tensor([]), id=2) # empty tensor + writer.write(tensor=torch.randn(1000), id=3) # large tensor + writer.write(tensor=torch.tensor([1.5, 2.5]), id=4) # float tensor + def test_table_writer_write(): """Test LighterTableWriter write functionality with various inputs.""" From e581031d11053236546aba520ba8cff4b5019fb3 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 19:28:04 -0500 Subject: [PATCH 40/51] test: Replace mocked Trainer with a real Trainer instance in tests --- tests/unit/test_callbacks_writer_table.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 50e56493..0d2a31bb 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -19,11 +19,19 @@ def test_table_writer_custom_writer(): writer.write(tensor=test_tensor, id=1) assert writer.csv_records[0]["pred"] == {"custom": 6} -def test_table_writer_distributed_gather(tmp_path): +def test_table_writer_distributed_gather(tmp_path, monkeypatch): writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") - trainer = mock.Mock() - trainer.world_size = 2 - trainer.is_global_zero = True + # Create a real Trainer instance + trainer = Trainer( + max_epochs=1, + strategy="ddp_spawn", # Use a distributed strategy + devices=2, # Simulate a distributed environment with 2 devices + accelerator="cpu" # Use CPU for testing + ) + + # Mock the distributed environment methods + monkeypatch.setattr(trainer, "world_size", 2) + monkeypatch.setattr(trainer, "is_global_zero", True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] writer.on_predict_epoch_end(trainer, mock.Mock()) assert (tmp_path / "test.csv").exists() From fa877804741273cee528594788ec6ee02d76f16f Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 19:33:14 -0500 Subject: [PATCH 41/51] test: Update Trainer instantiation in table writer tests for simplicity --- tests/unit/test_callbacks_writer_table.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 0d2a31bb..2d411c25 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -21,13 +21,7 @@ def test_table_writer_custom_writer(): def test_table_writer_distributed_gather(tmp_path, monkeypatch): writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") - # Create a real Trainer instance - trainer = Trainer( - max_epochs=1, - strategy="ddp_spawn", # Use a distributed strategy - devices=2, # Simulate a distributed environment with 2 devices - accelerator="cpu" # Use CPU for testing - ) + trainer = Trainer(max_epochs=1) # Mock the distributed environment methods monkeypatch.setattr(trainer, "world_size", 2) @@ -60,6 +54,9 @@ def test_table_writer_write(): writer.write(tensor=torch.randn(1000), id=3) # large tensor writer.write(tensor=torch.tensor([1.5, 2.5]), id=4) # float tensor + trainer = Trainer(max_epochs=1) + writer.on_predict_epoch_end(trainer, mock.Mock()) + # Verify file creation and content assert test_file.exists() with open(test_file) as f: From 48782a72fac614884e7df579440f9a1366d33248 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 19:33:14 -0500 Subject: [PATCH 42/51] test: Update test_table_writer_write to validate all CSV entries with pandas --- tests/unit/test_callbacks_writer_table.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 2d411c25..152f42d7 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -1,5 +1,6 @@ from unittest import mock from pathlib import Path +import pandas as pd import pytest import torch from pytorch_lightning import Trainer @@ -59,9 +60,17 @@ def test_table_writer_write(): # Verify file creation and content assert test_file.exists() - with open(test_file) as f: - content = f.read() - assert "1,2,3" in content # verify first tensor + df = pd.read_csv(test_file) + expected_records = [ + {"id": 1, "pred": [1, 2, 3]}, + {"id": 2, "pred": []}, + {"id": 3, "pred": test_tensor.tolist()}, + {"id": 4, "pred": [1.5, 2.5]}, + ] + for record in expected_records: + row = df[df['id'] == record['id']] + assert not row.empty + assert row['pred'].tolist() == record['pred'] # Cleanup test_file.unlink() From 4ff726078d69b8c673a9113c03095c5c768248d1 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 20:56:51 -0500 Subject: [PATCH 43/51] test: Update unit test for LighterTableWriter with expected records --- tests/unit/test_callbacks_writer_table.py | 24 ++++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 152f42d7..2ca2bc72 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -43,30 +43,26 @@ def test_table_writer_write(): test_file = Path("test.csv") writer = LighterTableWriter(path="test.csv", writer="tensor") + expected_records = [ + {"id": 1, "pred": [1, 2, 3]}, + {"id": "some_id", "pred": -1}, + {"id": 1331, "pred": [1.5, 2.5]}, + ] # Test basic write - test_tensor = torch.tensor([1, 2, 3]) - writer.write(tensor=test_tensor, id=1) + writer.write(tensor=torch.tensor(expected_records[0]["pred"]), id=expected_records[0]["id"]) assert len(writer.csv_records) == 1 - assert writer.csv_records[0]["pred"] == test_tensor.tolist() - assert writer.csv_records[0]["id"] == 1 + assert writer.csv_records[0]["pred"] == expected_records[0]["pred"] + assert writer.csv_records[0]["id"] == expected_records[0]["id"] # Test edge cases - writer.write(tensor=torch.tensor([]), id=2) # empty tensor - writer.write(tensor=torch.randn(1000), id=3) # large tensor - writer.write(tensor=torch.tensor([1.5, 2.5]), id=4) # float tensor - + writer.write(tensor=torch.tensor(expected_records[1]["pred"]), id=expected_records[1]["id"]) + writer.write(tensor=torch.tensor(expected_records[2]["pred"]), id=expected_records[2]["id"]) trainer = Trainer(max_epochs=1) writer.on_predict_epoch_end(trainer, mock.Mock()) # Verify file creation and content assert test_file.exists() df = pd.read_csv(test_file) - expected_records = [ - {"id": 1, "pred": [1, 2, 3]}, - {"id": 2, "pred": []}, - {"id": 3, "pred": test_tensor.tolist()}, - {"id": 4, "pred": [1.5, 2.5]}, - ] for record in expected_records: row = df[df['id'] == record['id']] assert not row.empty From 9eced0f0861715c92a4efc934609b8447fa8ae30 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 20:56:52 -0500 Subject: [PATCH 44/51] test: Fix DataFrame ID type comparison in table writer tests --- tests/unit/test_callbacks_writer_table.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 2ca2bc72..0477a221 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -11,7 +11,7 @@ def custom_writer(tensor): return {"custom": tensor.sum().item()} def test_table_writer_initialization(): - writer = LighterTableWriter(path="test.csv", writer="tensor") + writer = LighterTableWriter(path=test_file, writer="tensor") assert writer.path == Path("test.csv") def test_table_writer_custom_writer(): @@ -22,13 +22,13 @@ def test_table_writer_custom_writer(): def test_table_writer_distributed_gather(tmp_path, monkeypatch): writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") - trainer = Trainer(max_epochs=1) + trainer = Trainer(max_epochs=1, devices=1, accelerator="cpu") # Mock the distributed environment methods monkeypatch.setattr(trainer, "world_size", 2) monkeypatch.setattr(trainer, "is_global_zero", True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] - writer.on_predict_epoch_end(trainer, mock.Mock()) + writer.on_predict_epoch_end(trainer, LighterSystem()) assert (tmp_path / "test.csv").exists() def test_table_writer_edge_cases(): @@ -64,7 +64,7 @@ def test_table_writer_write(): assert test_file.exists() df = pd.read_csv(test_file) for record in expected_records: - row = df[df['id'] == record['id']] + row = df[df['id'] == str(record['id'])] assert not row.empty assert row['pred'].tolist() == record['pred'] From 6a7281ca33275190940d00605687ceffdef7a49d Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 21:19:24 -0500 Subject: [PATCH 45/51] test: Update test cases for LighterTableWriter initialization and validation --- tests/unit/test_callbacks_writer_table.py | 26 +++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 0477a221..006e7379 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -11,7 +11,7 @@ def custom_writer(tensor): return {"custom": tensor.sum().item()} def test_table_writer_initialization(): - writer = LighterTableWriter(path=test_file, writer="tensor") + writer = LighterTableWriter(path="test.csv", writer="tensor") assert writer.path == Path("test.csv") def test_table_writer_custom_writer(): @@ -22,13 +22,13 @@ def test_table_writer_custom_writer(): def test_table_writer_distributed_gather(tmp_path, monkeypatch): writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") - trainer = Trainer(max_epochs=1, devices=1, accelerator="cpu") + trainer = Trainer(max_epochs=1) # Mock the distributed environment methods monkeypatch.setattr(trainer, "world_size", 2) monkeypatch.setattr(trainer, "is_global_zero", True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] - writer.on_predict_epoch_end(trainer, LighterSystem()) + writer.on_predict_epoch_end(trainer, mock.Mock()) assert (tmp_path / "test.csv").exists() def test_table_writer_edge_cases(): @@ -60,13 +60,17 @@ def test_table_writer_write(): trainer = Trainer(max_epochs=1) writer.on_predict_epoch_end(trainer, mock.Mock()) - # Verify file creation and content - assert test_file.exists() - df = pd.read_csv(test_file) - for record in expected_records: - row = df[df['id'] == str(record['id'])] - assert not row.empty - assert row['pred'].tolist() == record['pred'] - + # Verify file creation and content + assert test_file.exists() + df = pd.read_csv(test_file) + df['id'] = df['id'].astype(str) + df['pred'] = df['pred'].apply(eval) + + for record in expected_records: + row = df[df['id'] == str(record['id'])] + assert not row.empty + pred_value = row['pred'].iloc[0] # get the value from the Series + assert pred_value == record['pred'] + # Cleanup test_file.unlink() From 0f2d3962a46cbd7fbfbffbbb80d73fa91e9f26b7 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 21:19:25 -0500 Subject: [PATCH 46/51] fix: Mock world_size and is_global_zero methods in tests --- tests/unit/test_callbacks_writer_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 006e7379..f19c4a64 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -25,8 +25,8 @@ def test_table_writer_distributed_gather(tmp_path, monkeypatch): trainer = Trainer(max_epochs=1) # Mock the distributed environment methods - monkeypatch.setattr(trainer, "world_size", 2) - monkeypatch.setattr(trainer, "is_global_zero", True) + monkeypatch.setattr(trainer, "world_size", lambda: 2) + monkeypatch.setattr(trainer, "is_global_zero", lambda: True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] writer.on_predict_epoch_end(trainer, mock.Mock()) assert (tmp_path / "test.csv").exists() From 74567dbc1f731252b1235a6ecd4cef5635b4ec6a Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 21:20:07 -0500 Subject: [PATCH 47/51] test: Update test directory name and improve error handling in tests --- tests/unit/test_callbacks_writer_file.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index deab3378..08bf578e 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -22,7 +22,7 @@ def test_file_writer_initialization(): def test_file_writer_write_tensor(): """Test LighterFileWriter's ability to write and persist tensors correctly.""" - test_dir = Path("test_dir_tensor") + test_dir = Path("test_dir") test_dir.mkdir(exist_ok=True) try: writer = LighterFileWriter(path=test_dir, writer="tensor") @@ -41,12 +41,17 @@ def test_file_writer_write_tensor(): def test_file_writer_write_tensor_errors(): """Test error handling in LighterFileWriter.""" - writer = LighterFileWriter(path="test_dir_errors", writer="tensor") - - # Test invalid tensor - with pytest.raises(TypeError): - writer.write("not a tensor", id=1) + test_dir = Path("test_dir") + test_dir.mkdir(exist_ok=True) + writer = LighterFileWriter(path=test_dir, writer="tensor") - # Test invalid ID - with pytest.raises(ValueError): - writer.write(torch.tensor([1]), id=-1) + try: + # Test invalid tensor + with pytest.raises(TypeError): + writer.write("not a tensor", id=1) + + # Test invalid ID + with pytest.raises(ValueError): + writer.write(torch.tensor([1]), id=-1) + finally: + shutil.rmtree(test_dir) From 51d455700169bd82f5e509c53091edea2b9d2148 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 21:20:08 -0500 Subject: [PATCH 48/51] test: Fix assertion and error handling in file writer tests --- tests/unit/test_callbacks_writer_file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index 08bf578e..a2ad79a1 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -14,7 +14,7 @@ def test_file_writer_initialization(): try: writer = LighterFileWriter(path=path, writer="tensor") assert writer.path == Path("test_dir") - assert writer.writer == "tensor" # Verify writer type + assert writer.writer.__name__ == "write_tensor" # Verify writer function finally: shutil.rmtree(path) # Clean up after test @@ -47,7 +47,7 @@ def test_file_writer_write_tensor_errors(): try: # Test invalid tensor - with pytest.raises(TypeError): + with pytest.raises((TypeError, AttributeError)): writer.write("not a tensor", id=1) # Test invalid ID From e9e97aa9620e2e36e9c3529b7760046e25b768a5 Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 21:21:03 -0500 Subject: [PATCH 49/51] test: Fix test assertions and mock properties in writer tests --- tests/unit/test_callbacks_writer_file.py | 2 +- tests/unit/test_callbacks_writer_table.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index a2ad79a1..a767c1d0 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -47,7 +47,7 @@ def test_file_writer_write_tensor_errors(): try: # Test invalid tensor - with pytest.raises((TypeError, AttributeError)): + with pytest.raises(ValueError): writer.write("not a tensor", id=1) # Test invalid ID diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index f19c4a64..006e7379 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -25,8 +25,8 @@ def test_table_writer_distributed_gather(tmp_path, monkeypatch): trainer = Trainer(max_epochs=1) # Mock the distributed environment methods - monkeypatch.setattr(trainer, "world_size", lambda: 2) - monkeypatch.setattr(trainer, "is_global_zero", lambda: True) + monkeypatch.setattr(trainer, "world_size", 2) + monkeypatch.setattr(trainer, "is_global_zero", True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] writer.on_predict_epoch_end(trainer, mock.Mock()) assert (tmp_path / "test.csv").exists() From 2047058445308a32a825c3c19b2b66f4c8a9c59e Mon Sep 17 00:00:00 2001 From: "Ibrahim Hadzic (aider)" Date: Fri, 29 Nov 2024 21:30:42 -0500 Subject: [PATCH 50/51] fix: Mock world_size and is_global_zero methods in tests --- tests/unit/test_callbacks_writer_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index 006e7379..f19c4a64 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -25,8 +25,8 @@ def test_table_writer_distributed_gather(tmp_path, monkeypatch): trainer = Trainer(max_epochs=1) # Mock the distributed environment methods - monkeypatch.setattr(trainer, "world_size", 2) - monkeypatch.setattr(trainer, "is_global_zero", True) + monkeypatch.setattr(trainer, "world_size", lambda: 2) + monkeypatch.setattr(trainer, "is_global_zero", lambda: True) writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] writer.on_predict_epoch_end(trainer, mock.Mock()) assert (tmp_path / "test.csv").exists() From 7b7836bd238ed52234b4f6f272684789e6426691 Mon Sep 17 00:00:00 2001 From: Ibrahim Hadzic Date: Fri, 29 Nov 2024 21:56:00 -0500 Subject: [PATCH 51/51] Improve tests provided by coderabbit --- lighter/callbacks/writer/table.py | 6 +- tests/unit/test_callbacks_writer_file.py | 25 +----- tests/unit/test_callbacks_writer_table.py | 93 +++++++++++++++-------- tests/unit/test_utils_collate.py | 30 ++++---- 4 files changed, 87 insertions(+), 67 deletions(-) diff --git a/lighter/callbacks/writer/table.py b/lighter/callbacks/writer/table.py index e8818354..08b069f5 100644 --- a/lighter/callbacks/writer/table.py +++ b/lighter/callbacks/writer/table.py @@ -60,7 +60,11 @@ def on_predict_epoch_end(self, trainer: Trainer, pl_module: LighterSystem) -> No # Save the records to a CSV file if trainer.is_global_zero: df = pd.DataFrame(self.csv_records) - df = df.sort_values("id").set_index("id") + try: + df = df.sort_values("id") + except TypeError: + pass + df = df.set_index("id") df.to_csv(self.path) # Clear the records after saving diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py index a767c1d0..c84c1c1d 100644 --- a/tests/unit/test_callbacks_writer_file.py +++ b/tests/unit/test_callbacks_writer_file.py @@ -1,3 +1,4 @@ +import shutil from pathlib import Path import torch @@ -5,8 +6,6 @@ from lighter.callbacks.writer.file import LighterFileWriter -import shutil - def test_file_writer_initialization(): """Test LighterFileWriter initialization with proper attributes.""" path = Path("test_dir") @@ -18,7 +17,6 @@ def test_file_writer_initialization(): finally: shutil.rmtree(path) # Clean up after test -import pytest def test_file_writer_write_tensor(): """Test LighterFileWriter's ability to write and persist tensors correctly.""" @@ -28,30 +26,13 @@ def test_file_writer_write_tensor(): writer = LighterFileWriter(path=test_dir, writer="tensor") tensor = torch.tensor([1, 2, 3]) writer.write(tensor, id=1) - + # Verify file exists saved_path = writer.path / "1.pt" assert saved_path.exists() - + # Verify tensor contents loaded_tensor = torch.load(saved_path) assert torch.equal(loaded_tensor, tensor) finally: shutil.rmtree(test_dir) - -def test_file_writer_write_tensor_errors(): - """Test error handling in LighterFileWriter.""" - test_dir = Path("test_dir") - test_dir.mkdir(exist_ok=True) - writer = LighterFileWriter(path=test_dir, writer="tensor") - - try: - # Test invalid tensor - with pytest.raises(ValueError): - writer.write("not a tensor", id=1) - - # Test invalid ID - with pytest.raises(ValueError): - writer.write(torch.tensor([1]), id=-1) - finally: - shutil.rmtree(test_dir) diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py index f19c4a64..f2370855 100644 --- a/tests/unit/test_callbacks_writer_table.py +++ b/tests/unit/test_callbacks_writer_table.py @@ -1,48 +1,36 @@ -from unittest import mock from pathlib import Path +from unittest import mock + import pandas as pd import pytest import torch from pytorch_lightning import Trainer + from lighter.callbacks.writer.table import LighterTableWriter from lighter.system import LighterSystem + def custom_writer(tensor): return {"custom": tensor.sum().item()} + def test_table_writer_initialization(): writer = LighterTableWriter(path="test.csv", writer="tensor") assert writer.path == Path("test.csv") + def test_table_writer_custom_writer(): writer = LighterTableWriter(path="test.csv", writer=custom_writer) test_tensor = torch.tensor([1, 2, 3]) writer.write(tensor=test_tensor, id=1) assert writer.csv_records[0]["pred"] == {"custom": 6} -def test_table_writer_distributed_gather(tmp_path, monkeypatch): - writer = LighterTableWriter(path=tmp_path / "test.csv", writer="tensor") - trainer = Trainer(max_epochs=1) - - # Mock the distributed environment methods - monkeypatch.setattr(trainer, "world_size", lambda: 2) - monkeypatch.setattr(trainer, "is_global_zero", lambda: True) - writer.csv_records = [{"id": 1, "pred": [1, 2, 3]}] - writer.on_predict_epoch_end(trainer, mock.Mock()) - assert (tmp_path / "test.csv").exists() - -def test_table_writer_edge_cases(): - writer = LighterTableWriter(path="test.csv", writer="tensor") - writer.write(tensor=torch.tensor([]), id=2) # empty tensor - writer.write(tensor=torch.randn(1000), id=3) # large tensor - writer.write(tensor=torch.tensor([1.5, 2.5]), id=4) # float tensor - def test_table_writer_write(): """Test LighterTableWriter write functionality with various inputs.""" test_file = Path("test.csv") writer = LighterTableWriter(path="test.csv", writer="tensor") - + expected_records = [ {"id": 1, "pred": [1, 2, 3]}, {"id": "some_id", "pred": -1}, @@ -53,24 +41,69 @@ def test_table_writer_write(): assert len(writer.csv_records) == 1 assert writer.csv_records[0]["pred"] == expected_records[0]["pred"] assert writer.csv_records[0]["id"] == expected_records[0]["id"] - + # Test edge cases writer.write(tensor=torch.tensor(expected_records[1]["pred"]), id=expected_records[1]["id"]) writer.write(tensor=torch.tensor(expected_records[2]["pred"]), id=expected_records[2]["id"]) trainer = Trainer(max_epochs=1) writer.on_predict_epoch_end(trainer, mock.Mock()) - # Verify file creation and content - assert test_file.exists() - df = pd.read_csv(test_file) - df['id'] = df['id'].astype(str) - df['pred'] = df['pred'].apply(eval) + # Verify file creation and content + assert test_file.exists() + df = pd.read_csv(test_file) + df["id"] = df["id"].astype(str) + df["pred"] = df["pred"].apply(eval) - for record in expected_records: - row = df[df['id'] == str(record['id'])] - assert not row.empty - pred_value = row['pred'].iloc[0] # get the value from the Series - assert pred_value == record['pred'] + for record in expected_records: + row = df[df["id"] == str(record["id"])] + assert not row.empty + pred_value = row["pred"].iloc[0] # get the value from the Series + assert pred_value == record["pred"] # Cleanup test_file.unlink() + + +def test_table_writer_write_multi_process(tmp_path, monkeypatch): + test_file = tmp_path / "test.csv" + writer = LighterTableWriter(path=test_file, writer="tensor") + trainer = Trainer(max_epochs=1) + + # Expected records after gathering from all processes + rank0_records = [{"id": 1, "pred": [1, 2, 3]}] # records from rank 0 + rank1_records = [{"id": 2, "pred": [4, 5, 6]}] # records from rank 1 + expected_records = rank0_records + rank1_records + + # Mock distributed functions for multi-process simulation + def mock_gather(obj, gather_list, dst=0): + if gather_list is not None: + # Fill gather_list with records from each rank + gather_list[0] = rank0_records + gather_list[1] = rank1_records + + def mock_get_rank(): + return 0 + + monkeypatch.setattr(torch.distributed, "gather_object", mock_gather) + monkeypatch.setattr(torch.distributed, "get_rank", mock_get_rank) + monkeypatch.setattr(trainer.strategy, "world_size", 2) + + writer.on_predict_epoch_end(trainer, mock.Mock()) + + # Verify file creation + assert test_file.exists() + + # Verify file content + df = pd.read_csv(test_file) + df["id"] = df["id"].astype(str) + df["pred"] = df["pred"].apply(eval) + + # Check that all expected records are in the CSV + for record in expected_records: + row = df[df["id"] == str(record["id"])] + assert not row.empty + pred_value = row["pred"].iloc[0] + assert pred_value == record["pred"] + + # Verify total number of records + assert len(df) == len(expected_records) diff --git a/tests/unit/test_utils_collate.py b/tests/unit/test_utils_collate.py index 96283578..24caa95c 100644 --- a/tests/unit/test_utils_collate.py +++ b/tests/unit/test_utils_collate.py @@ -1,9 +1,11 @@ +import torch + from lighter.utils.collate import collate_replace_corrupted def test_collate_replace_corrupted(): """Test collate_replace_corrupted function handles corrupted data correctly. - + Tests: - Corrupted values (None) are replaced with valid dataset values - Non-corrupted values remain unchanged @@ -12,23 +14,23 @@ def test_collate_replace_corrupted(): """ batch = [1, None, 2, None, 3] dataset = [1, 2, 3, 4, 5] + # `collate_replace_corrupted` works by removing the corrupted values + # from the batch before adding additional samples to make up for them collated_batch = collate_replace_corrupted(batch, dataset) + # Test length assert len(collated_batch) == len(batch) - - # Test non-corrupted values remain unchanged - assert collated_batch[0] == 1 - assert collated_batch[2] == 2 - assert collated_batch[4] == 3 - + + # Test non-corrupted values remain unchanged. + filtered_batch = list(filter(lambda x: x is not None, batch)) + assert collated_batch[0].item() == filtered_batch[0] + assert collated_batch[1].item() == filtered_batch[1] + assert collated_batch[2].item() == filtered_batch[2] + # Test corrupted values are replaced with valid dataset values - assert collated_batch[1] in dataset - assert collated_batch[3] in dataset - - # Test edge cases - empty_batch = [] - assert len(collate_replace_corrupted(empty_batch, dataset)) == 0 - + assert collated_batch[3].item() in dataset + assert collated_batch[4].item() in dataset + all_corrupted = [None, None, None] collated_all_corrupted = collate_replace_corrupted(all_corrupted, dataset) assert len(collated_all_corrupted) == len(all_corrupted)