project-lighter · ibro45 · Nov 30, 2024 · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,5 @@ projects/*
 **/predictions/
 */.DS_Store
 .DS_Store
+.aider*
+test_dir/
diff --git a/lighter/callbacks/writer/table.py b/lighter/callbacks/writer/table.py
@@ -60,7 +60,11 @@ def on_predict_epoch_end(self, trainer: Trainer, pl_module: LighterSystem) -> No
         # Save the records to a CSV file
         if trainer.is_global_zero:
             df = pd.DataFrame(self.csv_records)
-            df = df.sort_values("id").set_index("id")
+            try:
+                df = df.sort_values("id")
+            except TypeError:
+                pass
+            df = df.set_index("id")
             df.to_csv(self.path)
 
         # Clear the records after saving

diff --git a/lighter/utils/runner.py b/lighter/utils/runner.py
@@ -36,7 +36,7 @@ def parse_config(**kwargs) -> ConfigParser:
         raise ValueError("'--config' not specified. Please provide a valid configuration file.")
 
     # Initialize the parser with the predefined structure.
-    parser = ConfigParser(ConfigSchema().dict(), globals=False)
+    parser = ConfigParser(ConfigSchema().model_dump(), globals=False)
     # Update the parser with the configuration file.
     parser.update(parser.load_config_files(kwargs.pop("config")))
     # Update the parser with the provided cli arguments.

diff --git a/tests/unit/test_callbacks_freezer.py b/tests/unit/test_callbacks_freezer.py
@@ -0,0 +1,65 @@
+import pytest
+import torch
+from pytorch_lightning import Trainer
+from torch.nn import Module
+from torch.utils.data import Dataset
+
+from lighter.callbacks.freezer import LighterFreezer
+from lighter.system import LighterSystem
+
+
+class DummyModel(Module):
+    def __init__(self):
+        super().__init__()
+        self.layer1 = torch.nn.Linear(10, 10)
+        self.layer2 = torch.nn.Linear(10, 4)
+        self.layer3 = torch.nn.Linear(4, 1)
+
+    def forward(self, x):
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        return x
+
+
+class DummyDataset(Dataset):
+    def __len__(self):
+        return 10
+
+    def __getitem__(self, idx):
+        return {"input": torch.randn(10), "target": torch.tensor(0)}
+
+
+@pytest.fixture
+def dummy_system():
+    model = DummyModel()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+    dataset = DummyDataset()
+    criterion = torch.nn.CrossEntropyLoss()
+    return LighterSystem(model=model, batch_size=32, criterion=criterion, optimizer=optimizer, datasets={"train": dataset})
+
+
+def test_freezer_initialization():
+    freezer = LighterFreezer(names=["layer1"])
+    assert freezer.names == ["layer1"]
+
+
+def test_freezer_functionality(dummy_system):
+    freezer = LighterFreezer(names=["layer1.weight", "layer1.bias"])
+    trainer = Trainer(callbacks=[freezer], max_epochs=1)
+    trainer.fit(dummy_system)
+    assert not dummy_system.model.layer1.weight.requires_grad
+    assert not dummy_system.model.layer1.bias.requires_grad
+    assert dummy_system.model.layer2.weight.requires_grad
+
+
+def test_freezer_with_exceptions(dummy_system):
+    freezer = LighterFreezer(name_starts_with=["layer"], except_names=["layer2.weight", "layer2.bias"])
+    trainer = Trainer(callbacks=[freezer], max_epochs=1)
+    trainer.fit(dummy_system)
+    assert not dummy_system.model.layer1.weight.requires_grad
+    assert not dummy_system.model.layer1.bias.requires_grad
+    assert dummy_system.model.layer2.weight.requires_grad
+    assert dummy_system.model.layer2.bias.requires_grad
+    assert not dummy_system.model.layer3.weight.requires_grad
+    assert not dummy_system.model.layer3.bias.requires_grad
diff --git a/tests/unit/test_callbacks_utils.py b/tests/unit/test_callbacks_utils.py
@@ -0,0 +1,19 @@
+import torch
+
+from lighter.callbacks.utils import preprocess_image
+
+
+def test_preprocess_image_2d():
+    image = torch.rand(1, 3, 64, 64)  # Batch of 2D images
+    processed_image = preprocess_image(image)
+    assert processed_image.shape == (3, 64, 64)
+
+
+def test_preprocess_image_3d():
+    batch_size = 8
+    depth = 20
+    height = 64
+    width = 64
+    image = torch.rand(batch_size, 1, depth, height, width)  # Batch of 3D images
+    processed_image = preprocess_image(image)
+    assert processed_image.shape == (1, depth * height, batch_size * width)
diff --git a/tests/unit/test_callbacks_writer_base.py b/tests/unit/test_callbacks_writer_base.py
@@ -0,0 +1,8 @@
+import pytest
+
+from lighter.callbacks.writer.base import LighterBaseWriter
+
+
+def test_writer_initialization():
+    with pytest.raises(TypeError):
+        LighterBaseWriter(path="test", writer="tensor")
diff --git a/tests/unit/test_callbacks_writer_file.py b/tests/unit/test_callbacks_writer_file.py
@@ -0,0 +1,38 @@
+import shutil
+from pathlib import Path
+
+import torch
+
+from lighter.callbacks.writer.file import LighterFileWriter
+
+
+def test_file_writer_initialization():
+    """Test LighterFileWriter initialization with proper attributes."""
+    path = Path("test_dir")
+    path.mkdir(exist_ok=True)  # Ensure the directory exists
+    try:
+        writer = LighterFileWriter(path=path, writer="tensor")
+        assert writer.path == Path("test_dir")
+        assert writer.writer.__name__ == "write_tensor"  # Verify writer function
+    finally:
+        shutil.rmtree(path)  # Clean up after test
+
+
+def test_file_writer_write_tensor():
+    """Test LighterFileWriter's ability to write and persist tensors correctly."""
+    test_dir = Path("test_dir")
+    test_dir.mkdir(exist_ok=True)
+    try:
+        writer = LighterFileWriter(path=test_dir, writer="tensor")
+        tensor = torch.tensor([1, 2, 3])
+        writer.write(tensor, id=1)
+
+        # Verify file exists
+        saved_path = writer.path / "1.pt"
+        assert saved_path.exists()
+
+        # Verify tensor contents
+        loaded_tensor = torch.load(saved_path)
+        assert torch.equal(loaded_tensor, tensor)
+    finally:
+        shutil.rmtree(test_dir)
diff --git a/tests/unit/test_callbacks_writer_table.py b/tests/unit/test_callbacks_writer_table.py
@@ -0,0 +1,109 @@
+from pathlib import Path
+from unittest import mock
+
+import pandas as pd
+import pytest
+import torch
+from pytorch_lightning import Trainer
+
+from lighter.callbacks.writer.table import LighterTableWriter
+from lighter.system import LighterSystem
+
+
+def custom_writer(tensor):
+    return {"custom": tensor.sum().item()}
+
+
+def test_table_writer_initialization():
+    writer = LighterTableWriter(path="test.csv", writer="tensor")
+    assert writer.path == Path("test.csv")
+
+
+def test_table_writer_custom_writer():
+    writer = LighterTableWriter(path="test.csv", writer=custom_writer)
+    test_tensor = torch.tensor([1, 2, 3])
+    writer.write(tensor=test_tensor, id=1)
+    assert writer.csv_records[0]["pred"] == {"custom": 6}
+
+
+def test_table_writer_write():
+    """Test LighterTableWriter write functionality with various inputs."""
+    test_file = Path("test.csv")
+    writer = LighterTableWriter(path="test.csv", writer="tensor")
+
+    expected_records = [
+        {"id": 1, "pred": [1, 2, 3]},
+        {"id": "some_id", "pred": -1},
+        {"id": 1331, "pred": [1.5, 2.5]},
+    ]
+    # Test basic write
+    writer.write(tensor=torch.tensor(expected_records[0]["pred"]), id=expected_records[0]["id"])
+    assert len(writer.csv_records) == 1
+    assert writer.csv_records[0]["pred"] == expected_records[0]["pred"]
+    assert writer.csv_records[0]["id"] == expected_records[0]["id"]
+
+    # Test edge cases
+    writer.write(tensor=torch.tensor(expected_records[1]["pred"]), id=expected_records[1]["id"])
+    writer.write(tensor=torch.tensor(expected_records[2]["pred"]), id=expected_records[2]["id"])
+    trainer = Trainer(max_epochs=1)
+    writer.on_predict_epoch_end(trainer, mock.Mock())
+
+    # Verify file creation and content
+    assert test_file.exists()
+    df = pd.read_csv(test_file)
+    df["id"] = df["id"].astype(str)
+    df["pred"] = df["pred"].apply(eval)
+
+    for record in expected_records:
+        row = df[df["id"] == str(record["id"])]
+        assert not row.empty
+        pred_value = row["pred"].iloc[0]  # get the value from the Series
+        assert pred_value == record["pred"]
+
+    # Cleanup
+    test_file.unlink()
+
+
+def test_table_writer_write_multi_process(tmp_path, monkeypatch):
+    test_file = tmp_path / "test.csv"
+    writer = LighterTableWriter(path=test_file, writer="tensor")
+    trainer = Trainer(max_epochs=1)
+
+    # Expected records after gathering from all processes
+    rank0_records = [{"id": 1, "pred": [1, 2, 3]}]  # records from rank 0
+    rank1_records = [{"id": 2, "pred": [4, 5, 6]}]  # records from rank 1
+    expected_records = rank0_records + rank1_records
+
+    # Mock distributed functions for multi-process simulation
+    def mock_gather(obj, gather_list, dst=0):
+        if gather_list is not None:
+            # Fill gather_list with records from each rank
+            gather_list[0] = rank0_records
+            gather_list[1] = rank1_records
+
+    def mock_get_rank():
+        return 0
+
+    monkeypatch.setattr(torch.distributed, "gather_object", mock_gather)
+    monkeypatch.setattr(torch.distributed, "get_rank", mock_get_rank)
+    monkeypatch.setattr(trainer.strategy, "world_size", 2)
+
+    writer.on_predict_epoch_end(trainer, mock.Mock())
+
+    # Verify file creation
+    assert test_file.exists()
+
+    # Verify file content
+    df = pd.read_csv(test_file)
+    df["id"] = df["id"].astype(str)
+    df["pred"] = df["pred"].apply(eval)
+
+    # Check that all expected records are in the CSV
+    for record in expected_records:
+        row = df[df["id"] == str(record["id"])]
+        assert not row.empty
+        pred_value = row["pred"].iloc[0]
+        assert pred_value == record["pred"]
+
+    # Verify total number of records
+    assert len(df) == len(expected_records)