more fixing

Lightning-AI · Sep 17, 2024 · b53cf5e · b53cf5e
1 parent 46284e3
commit b53cf5e
Show file tree

Hide file tree

Showing 22 changed files with 117 additions and 169 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -46,13 +46,6 @@ repos:
         additional_dependencies: [tomli]
         #args: ["--write-changes"] # uncomment if you want to get automatic fixing
 
-  - repo: https://github.com/PyCQA/docformatter
-    rev: v1.7.5
-    hooks:
-      - id: docformatter
-        additional_dependencies: [tomli]
-        args: ["--in-place"]
-
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.3.2
     hooks:

diff --git a/examples/multi_modal/create_labelencoder.py b/examples/multi_modal/create_labelencoder.py
@@ -3,10 +3,7 @@
 
 
 def create_labelencoder():
-    """Create a label encoder
-    Returns:
-
-    """
+    """Create a label encoder."""
     data = ["Cancelation", "IBAN Change", "Damage Report"]
     # Create an instance of LabelEncoder
     label_encoder = LabelEncoder()

diff --git a/examples/multi_modal/dataloader.py b/examples/multi_modal/dataloader.py
@@ -29,14 +29,12 @@ def __init__(self):
         self.hyperparameters = HYPERPARAMETERS
 
     def load_labelencoder(self):
-        """Function to load the label encoder from s3
-        Returns:
-        """
+        """Function to load the label encoder from s3."""
         return joblib.load(self.hyperparameters["label_encoder_name"])
 
     def load_tokenizer(self):
-        """Load the tokenizer files and the pre training model path from s3 spezified in the hyperparameters
-        Returns: tokenizer
+        """Load the tokenizer files and the pre-training model path from s3 spezified in the hyperparameters
+        Returns: tokenizer.
         """
         # Load Bert tokenizer
         return BertTokenizerFast.from_pretrained("bert-base-cased")
@@ -60,12 +58,10 @@ def __init__(self, input_dir: Union[str, Any], hyperparameters: Union[dict, Any]
         self.labelencoder = EC.load_labelencoder()
 
     def tokenize_data(self, tokenizer, texts, max_length: int):
-        """Tokenize the text
-        Args:
-            tokenizer:
-            texts:
-            max_length:
-        Returns: input_ids, attention_masks
+        """Tokenize the text.
+
+        Returns: input_ids, attention_masks.
+
         """
         encoded_text = tokenizer(
             texts,
@@ -98,7 +94,7 @@ def __init__(self, hyperparameters: dict):
         """Init if the Data Module
         Args:
             data_path: dataframe with the data
-            hyperparameters:  Hyperparameters
+            hyperparameters:  Hyperparameters.
         """
         super().__init__()
         self.hyperparameters = hyperparameters
@@ -126,9 +122,12 @@ def __init__(self, hyperparameters: dict):
         )
 
     def train_dataloader(self) -> DataLoader:
-        """Define the training dataloader
+        """Define the training dataloader.
+
         Returns:
-            training dataloader
+        -------
+            training dataloader.
+
         """
         dataset_train = DocumentClassificationDataset(
             hyperparameters=self.hyperparameters,
@@ -147,7 +146,7 @@ def train_dataloader(self) -> DataLoader:
     def val_dataloader(self) -> DataLoader:
         """Define the validation dataloader
         Returns:
-            validation dataloader
+            validation dataloader.
         """
         dataset_val = DocumentClassificationDataset(
             hyperparameters=self.hyperparameters,
@@ -165,7 +164,7 @@ def val_dataloader(self) -> DataLoader:
     def test_dataloader(self) -> DataLoader:
         """Define the test dataloader
         Returns:
-            test dataloader
+            test dataloader.
         """
         dataset_test = DocumentClassificationDataset(
             hyperparameters=self.hyperparameters,

diff --git a/examples/multi_modal/loop.py b/examples/multi_modal/loop.py
@@ -78,7 +78,6 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
             mode: train, test or val
             report_confusion_matrix: sklearn confusion matrix
             report: sklear classification report
-        Returns:
 
         """
         df_cm = pd.DataFrame(report_confusion_matrix)
@@ -88,16 +87,7 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
         logger.info("Confusion Matrix and Classication report are saved.")
 
     def save_test_evaluations(self, model_dir, mode, y_pred, y_true, confis, numerical_id_):
-        """Save a pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset
-        Args:
-            model_dir:
-            mode:
-            y_pred:
-            y_true:
-            confis:
-            numerical_id_:
-        Returns:
-        """
+        """Save pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset."""
         df_test = pd.DataFrame()
         df_test["pred"] = y_pred
         df_test["confidence"] = confis.max(axis=1)
@@ -152,41 +142,36 @@ def forward(
 
         Used for train, test and val.
 
-        Args:
-        ----
-            y: tensor with text data as tokens
         Returns:
             computional graph
 
         """
         return self.module(x, y, z)
 
     def training_step(self, batch: Dict[str, torch.Tensor]) -> Dict:
-        """Call the eval share for training
-        Args:
-            batch: tensor
+        """Call the eval share for training.
+
         Returns:
-            dict with loss, outputs and ground_truth
+            dict with loss, outputs and ground_truth.
+
         """
         return self._shared_eval_step(batch, "train")
 
     def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
-        """Call the eval share for validation
-        Args:
-            batch:
-            batch_idx:
+        """Call the eval share for validation.
+
         Returns:
-            dict with loss, outputs and ground_truth
+            dict with loss, outputs and ground_truth.
+
         """
         return self._shared_eval_step(batch, "val")
 
     def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
-        """Call the eval share for test
-        Args:
-            batch:
-            batch_idx:
+        """Call the eval share for test.
+
         Returns:
-            dict with loss, outputs and ground_truth
+            dict with loss, outputs and ground_truth.
+
         """
         ret = self._shared_eval_step(batch, "test")
         self.pred_list.append(ret)
@@ -199,7 +184,9 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:
         ----
             batch: tensor
             mode: train, test or val
+
         Returns:
+        -------
             dict with loss, outputs and ground_truth
 
         """
@@ -227,13 +214,8 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:
 
         return {"outputs": out, "loss": loss, "ground_truth": ground_truth, "numerical_id": numerical_id}
 
-    def _epoch_end(self, mode: str):
-        """Calculate loss and metricies at end of epoch
-        Args:
-            mode:
-        Returns:
-            None
-        """
+    def _epoch_end(self, mode: str) -> None:
+        """Calculate loss and metrics at end of epoch."""
         if mode == "val":
             output = self.val_metrics.compute()
             self.log_dict(output)
@@ -248,15 +230,7 @@ def _epoch_end(self, mode: str):
             self.test_metrics.reset()
 
     def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader_idx: int = 0) -> torch.Tensor:
-        """Model prediction  without softmax and argmax to predict class label.
-
-        Args:
-        ----
-            outputs:
-        Returns:
-            None
-
-        """
+        """Model prediction  without softmax and argmax to predict class label."""
         self.eval()
         with torch.no_grad():
             ids = batch["ID"]
@@ -265,48 +239,31 @@ def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader
             return self.forward(ids, atts, img)
 
     def on_test_epoch_end(self) -> None:
-        """Calculate the metrics at the end of epoch for test step
-        Args:
-            outputs:
-        Returns:
-            None
-        """
+        """Calculate the metrics at the end of epoch for test step."""
         self._epoch_end("test")
 
-    def on_validation_epoch_end(self):
-        """Calculate the metrics at the end of epoch for val step
-        Args:
-            outputs:
-        Returns:
-            None
-        """
+    def on_validation_epoch_end(self) -> None:
+        """Calculate the metrics at the end of epoch for val step."""
         self._epoch_end("val")
 
-    def on_train_epoch_end(self):
-        """Calculate the metrics at the end of epoch for train step
-        Args:
-            outputs:
-        Returns:
-            None
-        """
+    def on_train_epoch_end(self) -> None:
+        """Calculate the metrics at the end of epoch for train step."""
         self._epoch_end("train")
 
     def configure_optimizers(self) -> Any:
-        """Configure the optimizer
+        """Configure the optimizer.
+
         Returns:
+        -------
             optimizer
+
         """
         optimizer = AdamW(self.parameters(), lr=self.learning_rate, weight_decay=self.hyperparameters["weight_decay"])
         scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
         return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]
 
     def configure_callbacks(self) -> Union[Sequence[pl.pytorch.Callback], pl.pytorch.Callback]:
-        """Configure Early stopping or Model Checkpointing.
-
-        Returns
-        -------
-
-        """
+        """Configure Early stopping or Model Checkpointing."""
         early_stop = EarlyStopping(
             monitor="val_MulticlassAccuracy", patience=self.hyperparameters["patience"], mode="max"
         )

diff --git a/examples/multi_modal/model_arc.py b/examples/multi_modal/model_arc.py
@@ -42,10 +42,6 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
 
         Used for train, test and val.
 
-        Args:
-        ----
-            input_ids
-            attention_mask
         Returns:
             computional graph
 
@@ -74,8 +70,9 @@ def __init__(self, endpoint_mode: bool, hyperparameters: dict):
         self.dropout = nn.Dropout(self.hyperparameters["dropout"])
 
     def get_bert_model(self):
-        """Load the pre trained bert model weigths
-        Returns: model
+        """Load the pre-trained bert model weigths.
+
+        Returns: model.
         """
         model = BertModel.from_pretrained("bert-base-cased")
         return BertClassifier(model)
@@ -91,9 +88,9 @@ def forward(
 
         Args:
         ----
-            x (torch.Tensor): Tensor with id tokesn
-            y (torch.Tensor): Tensor with attention tokens.
-            z (torch.Tensor): Tensor with iamge.
+            x: Tensor with id tokesn
+            y: Tensor with attention tokens.
+            z: Tensor with iamge.
 
         Returns:
         -------

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,6 +49,12 @@ ignore-words-list = "te, compiletime"
 [tool.ruff]
 line-length = 120
 target-version = "py38"
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".git",
+    "docs",
+    "src/litdata/utilities/_pytree.py",
+]
 # Enable Pyflakes `E` and `F` codes by default.
 lint.select = [
     "E", "W",  # see: https://pypi.org/project/pycodestyle
@@ -65,40 +71,47 @@ lint.extend-select = [
     "RET",  # see: https://pypi.org/project/flake8-return
     "PT",  # see: https://pypi.org/project/flake8-pytest-style
     "NPY201",  # see: https://docs.astral.sh/ruff/rules/numpy2-deprecation
-    "RUF100" # yesqa
+    "RUF100",  # yesqa
 ]
 lint.ignore = [
     "E731",  # Do not assign a lambda expression, use a def
     "S101",  # todo: Use of `assert` detected
 ]
-# Exclude a variety of commonly ignored directories.
-exclude = [
-    ".git",
-    "docs",
-    "src/litdata/utilities/_pytree.py",
-]
 lint.ignore-init-module-imports = true
+# Unlike Flake8, default to a complexity level of 10.
+lint.mccabe.max-complexity = 10
+# Use Google-style docstrings.
+lint.pydocstyle.convention = "google"
 
 [tool.ruff.lint.per-file-ignores]
-".actions/*" = ["S101", "S310"]
-"setup.py" = ["S101", "SIM115"]
+"setup.py" = ["D100", "SIM115"]
 "examples/**" = [
+    "D100", "D101", "D102", "D103", "D104", "D105", "D107",  # Missing docstring in public module, class, method, function, package
+    "D205",  # todo: 1 blank line required between summary line and description
+    "D401", "D404",  # First line should be in imperative mood; try rephrasing
     "S311",  # Standard pseudo-random generators are not suitable for cryptographic purposes
 ]
 "src/**" = [
+    "D100",  # Missing docstring in public module
+    "D101",  # todo: Missing docstring in public class
+    "D102",  # todo: Missing docstring in public method
+    "D103",  # todo: Missing docstring in public function
+    "D104",  # Missing docstring in public package
+    "D105",  # todo: Missing docstring in magic method
+    "D107",  # todo: Missing docstring in __init__
+    "D205",  # todo: 1 blank line required between summary line and description
+    "D401", "D404",  # todo: First line should be in imperative mood; try rephrasing
     "S602",  # todo: `subprocess` call with `shell=True` identified, security issue
     "S605",  # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell`
     "S607",  # todo: Starting a process with a partial executable path
     "S310",  # todo: Audit URL open for permitted schemes. Allowing use of `file:` or custom schemes is often unexpected.
 ]
 "tests/**" = [
+    "D100", "D101", "D102", "D103", "D104", "D105", "D107",  # Missing docstring in public module, class, method, function, package
+    "D401", "D404",  # First line should be in imperative mood; try rephrasing
     "S105", "S106",  # todo: Possible hardcoded password: ...
-    "D100", "D101", "D102", "D103", "D104", "D105",
 ]
 
-[tool.ruff.lint.mccabe]
-# Unlike Flake8, default to a complexity level of 10.
-max-complexity = 10
 
 
 [tool.mypy]