Skip to content

Commit

Permalink
more fixing
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Sep 17, 2024
1 parent 46284e3 commit b53cf5e
Show file tree
Hide file tree
Showing 22 changed files with 117 additions and 169 deletions.
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ repos:
additional_dependencies: [tomli]
#args: ["--write-changes"] # uncomment if you want to get automatic fixing

- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
- id: docformatter
additional_dependencies: [tomli]
args: ["--in-place"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
hooks:
Expand Down
5 changes: 1 addition & 4 deletions examples/multi_modal/create_labelencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@


def create_labelencoder():
"""Create a label encoder
Returns:
"""
"""Create a label encoder."""
data = ["Cancelation", "IBAN Change", "Damage Report"]
# Create an instance of LabelEncoder
label_encoder = LabelEncoder()
Expand Down
31 changes: 15 additions & 16 deletions examples/multi_modal/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,12 @@ def __init__(self):
self.hyperparameters = HYPERPARAMETERS

def load_labelencoder(self):
"""Function to load the label encoder from s3
Returns:
"""
"""Function to load the label encoder from s3."""
return joblib.load(self.hyperparameters["label_encoder_name"])

def load_tokenizer(self):
"""Load the tokenizer files and the pre training model path from s3 spezified in the hyperparameters
Returns: tokenizer
"""Load the tokenizer files and the pre-training model path from s3 spezified in the hyperparameters
Returns: tokenizer.
"""
# Load Bert tokenizer
return BertTokenizerFast.from_pretrained("bert-base-cased")
Expand All @@ -60,12 +58,10 @@ def __init__(self, input_dir: Union[str, Any], hyperparameters: Union[dict, Any]
self.labelencoder = EC.load_labelencoder()

def tokenize_data(self, tokenizer, texts, max_length: int):
"""Tokenize the text
Args:
tokenizer:
texts:
max_length:
Returns: input_ids, attention_masks
"""Tokenize the text.
Returns: input_ids, attention_masks.
"""
encoded_text = tokenizer(
texts,
Expand Down Expand Up @@ -98,7 +94,7 @@ def __init__(self, hyperparameters: dict):
"""Init if the Data Module
Args:
data_path: dataframe with the data
hyperparameters: Hyperparameters
hyperparameters: Hyperparameters.
"""
super().__init__()
self.hyperparameters = hyperparameters
Expand Down Expand Up @@ -126,9 +122,12 @@ def __init__(self, hyperparameters: dict):
)

def train_dataloader(self) -> DataLoader:
"""Define the training dataloader
"""Define the training dataloader.
Returns:
training dataloader
-------
training dataloader.
"""
dataset_train = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -147,7 +146,7 @@ def train_dataloader(self) -> DataLoader:
def val_dataloader(self) -> DataLoader:
"""Define the validation dataloader
Returns:
validation dataloader
validation dataloader.
"""
dataset_val = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -165,7 +164,7 @@ def val_dataloader(self) -> DataLoader:
def test_dataloader(self) -> DataLoader:
"""Define the test dataloader
Returns:
test dataloader
test dataloader.
"""
dataset_test = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand Down
99 changes: 28 additions & 71 deletions examples/multi_modal/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
mode: train, test or val
report_confusion_matrix: sklearn confusion matrix
report: sklear classification report
Returns:
"""
df_cm = pd.DataFrame(report_confusion_matrix)
Expand All @@ -88,16 +87,7 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
logger.info("Confusion Matrix and Classication report are saved.")

def save_test_evaluations(self, model_dir, mode, y_pred, y_true, confis, numerical_id_):
"""Save a pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset
Args:
model_dir:
mode:
y_pred:
y_true:
confis:
numerical_id_:
Returns:
"""
"""Save pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset."""
df_test = pd.DataFrame()
df_test["pred"] = y_pred
df_test["confidence"] = confis.max(axis=1)
Expand Down Expand Up @@ -152,41 +142,36 @@ def forward(
Used for train, test and val.
Args:
----
y: tensor with text data as tokens
Returns:
computional graph
"""
return self.module(x, y, z)

def training_step(self, batch: Dict[str, torch.Tensor]) -> Dict:
"""Call the eval share for training
Args:
batch: tensor
"""Call the eval share for training.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
return self._shared_eval_step(batch, "train")

def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""Call the eval share for validation
Args:
batch:
batch_idx:
"""Call the eval share for validation.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
return self._shared_eval_step(batch, "val")

def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""Call the eval share for test
Args:
batch:
batch_idx:
"""Call the eval share for test.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
ret = self._shared_eval_step(batch, "test")
self.pred_list.append(ret)
Expand All @@ -199,7 +184,9 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:
----
batch: tensor
mode: train, test or val
Returns:
-------
dict with loss, outputs and ground_truth
"""
Expand Down Expand Up @@ -227,13 +214,8 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:

return {"outputs": out, "loss": loss, "ground_truth": ground_truth, "numerical_id": numerical_id}

def _epoch_end(self, mode: str):
"""Calculate loss and metricies at end of epoch
Args:
mode:
Returns:
None
"""
def _epoch_end(self, mode: str) -> None:
"""Calculate loss and metrics at end of epoch."""
if mode == "val":
output = self.val_metrics.compute()
self.log_dict(output)
Expand All @@ -248,15 +230,7 @@ def _epoch_end(self, mode: str):
self.test_metrics.reset()

def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader_idx: int = 0) -> torch.Tensor:
"""Model prediction without softmax and argmax to predict class label.
Args:
----
outputs:
Returns:
None
"""
"""Model prediction without softmax and argmax to predict class label."""
self.eval()
with torch.no_grad():
ids = batch["ID"]
Expand All @@ -265,48 +239,31 @@ def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader
return self.forward(ids, atts, img)

def on_test_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for test step
Args:
outputs:
Returns:
None
"""
"""Calculate the metrics at the end of epoch for test step."""
self._epoch_end("test")

def on_validation_epoch_end(self):
"""Calculate the metrics at the end of epoch for val step
Args:
outputs:
Returns:
None
"""
def on_validation_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for val step."""
self._epoch_end("val")

def on_train_epoch_end(self):
"""Calculate the metrics at the end of epoch for train step
Args:
outputs:
Returns:
None
"""
def on_train_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for train step."""
self._epoch_end("train")

def configure_optimizers(self) -> Any:
"""Configure the optimizer
"""Configure the optimizer.
Returns:
-------
optimizer
"""
optimizer = AdamW(self.parameters(), lr=self.learning_rate, weight_decay=self.hyperparameters["weight_decay"])
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]

def configure_callbacks(self) -> Union[Sequence[pl.pytorch.Callback], pl.pytorch.Callback]:
"""Configure Early stopping or Model Checkpointing.
Returns
-------
"""
"""Configure Early stopping or Model Checkpointing."""
early_stop = EarlyStopping(
monitor="val_MulticlassAccuracy", patience=self.hyperparameters["patience"], mode="max"
)
Expand Down
15 changes: 6 additions & 9 deletions examples/multi_modal/model_arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
Used for train, test and val.
Args:
----
input_ids
attention_mask
Returns:
computional graph
Expand Down Expand Up @@ -74,8 +70,9 @@ def __init__(self, endpoint_mode: bool, hyperparameters: dict):
self.dropout = nn.Dropout(self.hyperparameters["dropout"])

def get_bert_model(self):
"""Load the pre trained bert model weigths
Returns: model
"""Load the pre-trained bert model weigths.
Returns: model.
"""
model = BertModel.from_pretrained("bert-base-cased")
return BertClassifier(model)
Expand All @@ -91,9 +88,9 @@ def forward(
Args:
----
x (torch.Tensor): Tensor with id tokesn
y (torch.Tensor): Tensor with attention tokens.
z (torch.Tensor): Tensor with iamge.
x: Tensor with id tokesn
y: Tensor with attention tokens.
z: Tensor with iamge.
Returns:
-------
Expand Down
39 changes: 26 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ ignore-words-list = "te, compiletime"
[tool.ruff]
line-length = 120
target-version = "py38"
# Exclude a variety of commonly ignored directories.
exclude = [
".git",
"docs",
"src/litdata/utilities/_pytree.py",
]
# Enable Pyflakes `E` and `F` codes by default.
lint.select = [
"E", "W", # see: https://pypi.org/project/pycodestyle
Expand All @@ -65,40 +71,47 @@ lint.extend-select = [
"RET", # see: https://pypi.org/project/flake8-return
"PT", # see: https://pypi.org/project/flake8-pytest-style
"NPY201", # see: https://docs.astral.sh/ruff/rules/numpy2-deprecation
"RUF100" # yesqa
"RUF100", # yesqa
]
lint.ignore = [
"E731", # Do not assign a lambda expression, use a def
"S101", # todo: Use of `assert` detected
]
# Exclude a variety of commonly ignored directories.
exclude = [
".git",
"docs",
"src/litdata/utilities/_pytree.py",
]
lint.ignore-init-module-imports = true
# Unlike Flake8, default to a complexity level of 10.
lint.mccabe.max-complexity = 10
# Use Google-style docstrings.
lint.pydocstyle.convention = "google"

[tool.ruff.lint.per-file-ignores]
".actions/*" = ["S101", "S310"]
"setup.py" = ["S101", "SIM115"]
"setup.py" = ["D100", "SIM115"]
"examples/**" = [
"D100", "D101", "D102", "D103", "D104", "D105", "D107", # Missing docstring in public module, class, method, function, package
"D205", # todo: 1 blank line required between summary line and description
"D401", "D404", # First line should be in imperative mood; try rephrasing
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
]
"src/**" = [
"D100", # Missing docstring in public module
"D101", # todo: Missing docstring in public class
"D102", # todo: Missing docstring in public method
"D103", # todo: Missing docstring in public function
"D104", # Missing docstring in public package
"D105", # todo: Missing docstring in magic method
"D107", # todo: Missing docstring in __init__
"D205", # todo: 1 blank line required between summary line and description
"D401", "D404", # todo: First line should be in imperative mood; try rephrasing
"S602", # todo: `subprocess` call with `shell=True` identified, security issue
"S605", # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell`
"S607", # todo: Starting a process with a partial executable path
"S310", # todo: Audit URL open for permitted schemes. Allowing use of `file:` or custom schemes is often unexpected.
]
"tests/**" = [
"D100", "D101", "D102", "D103", "D104", "D105", "D107", # Missing docstring in public module, class, method, function, package
"D401", "D404", # First line should be in imperative mood; try rephrasing
"S105", "S106", # todo: Possible hardcoded password: ...
"D100", "D101", "D102", "D103", "D104", "D105",
]

[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10


[tool.mypy]
Expand Down
Loading

0 comments on commit b53cf5e

Please sign in to comment.