diff --git a/nemo/collections/vlm/mllama/data/lazy.py b/nemo/collections/vlm/mllama/data/lazy.py index 8e4998de713d..30b8b2ea9d9c 100644 --- a/nemo/collections/vlm/mllama/data/lazy.py +++ b/nemo/collections/vlm/mllama/data/lazy.py @@ -16,16 +16,14 @@ import logging import os import re -from typing import Any, Dict, List, Sequence -from typing import Optional +from typing import Any, Dict, List, Optional, Sequence import pytorch_lightning as pl import torch import torch.nn.functional as F from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS from torch.utils import data -from torch.utils.data import DataLoader -from torch.utils.data import default_collate +from torch.utils.data import DataLoader, default_collate from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids from nemo.collections.vlm.mllama.model.utils import create_vision_mask_tensor @@ -38,12 +36,12 @@ class MLlamaDataset(LazySupervisedDataset): """Dataset for supervised fine-tuning.""" def __init__( - self, - data_path, - data_config, - tokenizer, - image_processor, - sequence_length, + self, + data_path, + data_config, + tokenizer, + image_processor, + sequence_length, ): if data_path.endswith(".json"): @@ -174,24 +172,24 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: class MLlamaLazyDataModule(pl.LightningDataModule): def __init__( - self, - paths: str|List[str], - weights: Optional[List[float]] = None, - data_config: Optional[DataConfig] = ImageDataConfig, - seq_length: int = 2048, - decoder_seq_length: Optional[int] = None, - tokenizer: Optional = None, - image_processor: Optional = None, - micro_batch_size: int = 4, - global_batch_size: int = 8, - num_train_samples: int = 10_000, - num_val_samples: int = 10_000, - num_test_samples: int = 10_000, - num_workers: int = 8, - pin_memory: bool = True, - persistent_workers: bool = False, - use_packed_sequence: bool = False, - seed: int = 1234, + self, + paths: str | List[str], + weights: Optional[List[float]] = None, + data_config: Optional[DataConfig] = ImageDataConfig, + seq_length: int = 2048, + decoder_seq_length: Optional[int] = None, + tokenizer: Optional = None, + image_processor: Optional = None, + micro_batch_size: int = 4, + global_batch_size: int = 8, + num_train_samples: int = 10_000, + num_val_samples: int = 10_000, + num_test_samples: int = 10_000, + num_workers: int = 8, + pin_memory: bool = True, + persistent_workers: bool = False, + use_packed_sequence: bool = False, + seed: int = 1234, ) -> None: super().__init__() if not isinstance(paths, (list, tuple)): diff --git a/nemo/collections/vlm/neva/data/lazy.py b/nemo/collections/vlm/neva/data/lazy.py index d0143cffc0c8..57aa5b408835 100644 --- a/nemo/collections/vlm/neva/data/lazy.py +++ b/nemo/collections/vlm/neva/data/lazy.py @@ -17,8 +17,7 @@ import os import re import tarfile -from typing import Any, Dict, List, Sequence -from typing import Optional +from typing import Any, Dict, List, Optional, Sequence import decord import numpy as np @@ -28,8 +27,7 @@ from PIL import Image from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS from torch.utils import data -from torch.utils.data import DataLoader -from torch.utils.data import Dataset, default_collate +from torch.utils.data import DataLoader, Dataset, default_collate from transformers import CLIPImageProcessor, SiglipImageProcessor from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids @@ -239,7 +237,7 @@ def find_pattern_indices(template, pattern, search_start_index=0, allow_first_to template_len = len(template) pattern_len = len(pattern) for i in range(search_start_index, template_len - pattern_len + 1): - match = template[i: i + pattern_len] == pattern + match = template[i : i + pattern_len] == pattern if torch.all(match) or (allow_first_token_mismatch and torch.all(match[1:])): return i, i + pattern_len return -1, -1 @@ -248,12 +246,12 @@ def find_pattern_indices(template, pattern, search_start_index=0, allow_first_to class LazySupervisedDataset(Dataset): def __init__( - self, - data_path, - data_config, - tokenizer, - image_processor, - sequence_length, + self, + data_path, + data_config, + tokenizer, + image_processor, + sequence_length, ): super().__init__() if data_path is not None: @@ -352,7 +350,7 @@ def _tokenize_and_label(self, conversations): for i in range(1, len(self.conv.messages), 2): stop_str = getattr(self.conv, "stop_str", None) assert ( - stop_str is not None + stop_str is not None ), "If `stop_str` is not provided, issues might occur in labeling the answer tokens." answer_tokens = self.tokenizer.encode( self.conv.messages[i][1] + ("" if stop_str is None else stop_str), @@ -378,11 +376,11 @@ class NevaDataset(LazySupervisedDataset): """Dataset for supervised fine-tuning.""" def __init__( - self, - data_path, - data_config, - tokenizer, - image_processor, + self, + data_path, + data_config, + tokenizer, + image_processor, ): if data_path.endswith(".json"): @@ -494,23 +492,23 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: class NevaLazyDataModule(pl.LightningDataModule): def __init__( - self, - paths: str|List[str], - weights: Optional[List[float]] = None, - data_config: Optional[DataConfig] = ImageDataConfig, - seq_length: int = 2048, - tokenizer: Optional = None, - image_processor: Optional = None, - micro_batch_size: int = 4, - global_batch_size: int = 8, - num_train_samples: int = 10_000, - num_val_samples: int = 10_000, - num_test_samples: int = 10_000, - num_workers: int = 8, - pin_memory: bool = True, - persistent_workers: bool = False, - use_packed_sequence: bool = False, - seed: int = 1234, + self, + paths: str | List[str], + weights: Optional[List[float]] = None, + data_config: Optional[DataConfig] = ImageDataConfig, + seq_length: int = 2048, + tokenizer: Optional = None, + image_processor: Optional = None, + micro_batch_size: int = 4, + global_batch_size: int = 8, + num_train_samples: int = 10_000, + num_val_samples: int = 10_000, + num_test_samples: int = 10_000, + num_workers: int = 8, + pin_memory: bool = True, + persistent_workers: bool = False, + use_packed_sequence: bool = False, + seed: int = 1234, ) -> None: super().__init__() if not isinstance(paths, (list, tuple)):