Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixing docstrings #374

Merged
merged 7 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ repos:
additional_dependencies: [tomli]
#args: ["--write-changes"] # uncomment if you want to get automatic fixing

- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
- id: docformatter
additional_dependencies: [tomli]
args: ["--in-place"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
hooks:
Expand Down
6 changes: 1 addition & 5 deletions examples/multi_modal/create_labelencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@


def create_labelencoder():
"""
Create a label encoder
Returns:

"""
"""Create a label encoder."""
data = ["Cancelation", "IBAN Change", "Damage Report"]
# Create an instance of LabelEncoder
label_encoder = LabelEncoder()
Expand Down
44 changes: 19 additions & 25 deletions examples/multi_modal/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,12 @@ def __init__(self):
self.hyperparameters = HYPERPARAMETERS

def load_labelencoder(self):
"""
Function to load the label encoder from s3
Returns:
"""
"""Function to load the label encoder from s3."""
return joblib.load(self.hyperparameters["label_encoder_name"])

def load_tokenizer(self):
"""
load the tokenizer files and the pre-training model path from s3 specified in the hyperparameters
"""Loads the tokenizer files and the pre-training model path from s3 specified in the hyperparameters.

Returns: tokenizer
"""
# Load Bert tokenizer
Expand All @@ -62,13 +59,10 @@ def __init__(self, input_dir: Union[str, Any], hyperparameters: Union[dict, Any]
self.labelencoder = EC.load_labelencoder()

def tokenize_data(self, tokenizer, texts, max_length: int):
"""
Tokenize the text
Args:
tokenizer:
texts:
max_length:
Returns: input_ids, attention_masks
"""Tokenize the text.

Returns: input_ids, attention_masks.

"""
encoded_text = tokenizer(
texts,
Expand Down Expand Up @@ -98,11 +92,10 @@ class MixedDataModule(pl.LightningDataModule):
"""Own DataModule form the pytorch lightning DataModule."""

def __init__(self, hyperparameters: dict):
"""
Init if the Data Module
"""Initialize if the Data Module.

Args:
data_path: dataframe with the data
hyperparameters: Hyperparameters
hyperparameters: Hyperparameters.
"""
super().__init__()
self.hyperparameters = hyperparameters
Expand Down Expand Up @@ -130,10 +123,11 @@ def __init__(self, hyperparameters: dict):
)

def train_dataloader(self) -> DataLoader:
"""
Define the training dataloader
"""Define the training dataloader.

Returns:
training dataloader
training dataloader.

"""
dataset_train = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -150,10 +144,10 @@ def train_dataloader(self) -> DataLoader:
)

def val_dataloader(self) -> DataLoader:
"""
Define the validation dataloader
"""Defines the validation dataloader.

Returns:
validation dataloader
validation dataloader.
"""
dataset_val = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -169,8 +163,8 @@ def val_dataloader(self) -> DataLoader:
)

def test_dataloader(self) -> DataLoader:
"""
Define the test dataloader
"""Defines the test dataloader.

Returns:
test dataloader
"""
Expand Down
104 changes: 27 additions & 77 deletions examples/multi_modal/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
mode: train, test or val
report_confusion_matrix: sklearn confusion matrix
report: sklear classification report
Returns:

"""
df_cm = pd.DataFrame(report_confusion_matrix)
Expand All @@ -87,17 +86,7 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
logger.info("Confusion Matrix and Classification report are saved.")

def save_test_evaluations(self, model_dir, mode, y_pred, y_true, confis, numerical_id_):
"""
Save a pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset
Args:
model_dir:
mode:
y_pred:
y_true:
confis:
numerical_id_:
Returns:
"""
"""Save pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset."""
df_test = pd.DataFrame()
df_test["pred"] = y_pred
df_test["confidence"] = confis.max(axis=1)
Expand Down Expand Up @@ -151,43 +140,37 @@ def forward(
"""Forward path, calculate the computational graph in the forward direction.

Used for train, test and val.
Args:
y: tensor with text data as tokens

Returns:
computional graph

"""
return self.module(x, y, z)

def training_step(self, batch: Dict[str, torch.Tensor]) -> Dict:
"""
Call the eval share for training
Args:
batch: tensor
"""Call the eval share for training.

Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.

"""
return self._shared_eval_step(batch, "train")

def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""
Call the eval share for validation
Args:
batch:
batch_idx:
"""Call the eval share for validation.

Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.

"""
return self._shared_eval_step(batch, "val")

def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""
Call the eval share for test
Args:
batch:
batch_idx:
"""Call the eval share for test.

Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.

"""
ret = self._shared_eval_step(batch, "test")
self.pred_list.append(ret)
Expand All @@ -199,6 +182,7 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:
Args:
batch: tensor
mode: train, test or val

Returns:
dict with loss, outputs and ground_truth

Expand Down Expand Up @@ -227,14 +211,8 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:

return {"outputs": out, "loss": loss, "ground_truth": ground_truth, "numerical_id": numerical_id}

def _epoch_end(self, mode: str):
"""
Calculate loss and metricies at end of epoch
Args:
mode:
Returns:
None
"""
def _epoch_end(self, mode: str) -> None:
"""Calculate loss and metrics at end of epoch."""
if mode == "val":
output = self.val_metrics.compute()
self.log_dict(output)
Expand All @@ -249,14 +227,7 @@ def _epoch_end(self, mode: str):
self.test_metrics.reset()

def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader_idx: int = 0) -> torch.Tensor:
"""Model prediction without softmax and argmax to predict class label.

Args:
outputs:
Returns:
None

"""
"""Model prediction without softmax and argmax to predict class label."""
self.eval()
with torch.no_grad():
ids = batch["ID"]
Expand All @@ -265,51 +236,30 @@ def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader
return self.forward(ids, atts, img)

def on_test_epoch_end(self) -> None:
"""
Calculate the metrics at the end of epoch for test step
Args:
outputs:
Returns:
None
"""
"""Calculate the metrics at the end of epoch for test step."""
self._epoch_end("test")

def on_validation_epoch_end(self):
"""
Calculate the metrics at the end of epoch for val step
Args:
outputs:
Returns:
None
"""
def on_validation_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for val step."""
self._epoch_end("val")

def on_train_epoch_end(self):
"""
Calculate the metrics at the end of epoch for train step
Args:
outputs:
Returns:
None
"""
def on_train_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for train step."""
self._epoch_end("train")

def configure_optimizers(self) -> Any:
"""
Configure the optimizer
"""Configure the optimizer.

Returns:
optimizer

"""
optimizer = AdamW(self.parameters(), lr=self.learning_rate, weight_decay=self.hyperparameters["weight_decay"])
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]

def configure_callbacks(self) -> Union[Sequence[pl.pytorch.Callback], pl.pytorch.Callback]:
"""Configure Early stopping or Model Checkpointing.

Returns:

"""
"""Configure Early stopping or Model Checkpointing."""
early_stop = EarlyStopping(
monitor="val_MulticlassAccuracy", patience=self.hyperparameters["patience"], mode="max"
)
Expand Down
16 changes: 7 additions & 9 deletions examples/multi_modal/model_arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
"""Forward path, calculate the computational graph in the forward direction.

Used for train, test and val.
Args:
input_ids
attention_mask

Returns:
computional graph

Expand Down Expand Up @@ -72,9 +70,9 @@ def __init__(self, endpoint_mode: bool, hyperparameters: dict):
self.dropout = nn.Dropout(self.hyperparameters["dropout"])

def get_bert_model(self):
"""
Load the pre trained bert model weights
Returns: model
"""Load the pre-trained bert model weights.

Returns: model.
"""
model = BertModel.from_pretrained("bert-base-cased")
return BertClassifier(model)
Expand All @@ -89,9 +87,9 @@ def forward(
validation.

Args:
x (torch.Tensor): Tensor with id token
y (torch.Tensor): Tensor with attention tokens.
z (torch.Tensor): Tensor with image.
x: Tensor with id token
y: Tensor with attention tokens.
z: Tensor with image.

Returns:
torch.Tensor: The output tensor representing the computational graph.
Expand Down
Loading
Loading