diff --git a/nemo/collections/vlm/mllama/data/lazy.py b/nemo/collections/vlm/mllama/data/lazy.py
index 8e4998de713d..30b8b2ea9d9c 100644
--- a/nemo/collections/vlm/mllama/data/lazy.py
+++ b/nemo/collections/vlm/mllama/data/lazy.py
@@ -16,16 +16,14 @@
 import logging
 import os
 import re
-from typing import Any, Dict, List, Sequence
-from typing import Optional
+from typing import Any, Dict, List, Optional, Sequence
 
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
 from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
 from torch.utils import data
-from torch.utils.data import DataLoader
-from torch.utils.data import default_collate
+from torch.utils.data import DataLoader, default_collate
 
 from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids
 from nemo.collections.vlm.mllama.model.utils import create_vision_mask_tensor
@@ -38,12 +36,12 @@ class MLlamaDataset(LazySupervisedDataset):
     """Dataset for supervised fine-tuning."""
 
     def __init__(
-            self,
-            data_path,
-            data_config,
-            tokenizer,
-            image_processor,
-            sequence_length,
+        self,
+        data_path,
+        data_config,
+        tokenizer,
+        image_processor,
+        sequence_length,
     ):
 
         if data_path.endswith(".json"):
@@ -174,24 +172,24 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
 
 class MLlamaLazyDataModule(pl.LightningDataModule):
     def __init__(
-            self,
-            paths: str|List[str],
-            weights: Optional[List[float]] = None,
-            data_config: Optional[DataConfig] = ImageDataConfig,
-            seq_length: int = 2048,
-            decoder_seq_length: Optional[int] = None,
-            tokenizer: Optional = None,
-            image_processor: Optional = None,
-            micro_batch_size: int = 4,
-            global_batch_size: int = 8,
-            num_train_samples: int = 10_000,
-            num_val_samples: int = 10_000,
-            num_test_samples: int = 10_000,
-            num_workers: int = 8,
-            pin_memory: bool = True,
-            persistent_workers: bool = False,
-            use_packed_sequence: bool = False,
-            seed: int = 1234,
+        self,
+        paths: str | List[str],
+        weights: Optional[List[float]] = None,
+        data_config: Optional[DataConfig] = ImageDataConfig,
+        seq_length: int = 2048,
+        decoder_seq_length: Optional[int] = None,
+        tokenizer: Optional = None,
+        image_processor: Optional = None,
+        micro_batch_size: int = 4,
+        global_batch_size: int = 8,
+        num_train_samples: int = 10_000,
+        num_val_samples: int = 10_000,
+        num_test_samples: int = 10_000,
+        num_workers: int = 8,
+        pin_memory: bool = True,
+        persistent_workers: bool = False,
+        use_packed_sequence: bool = False,
+        seed: int = 1234,
     ) -> None:
         super().__init__()
         if not isinstance(paths, (list, tuple)):
diff --git a/nemo/collections/vlm/neva/data/lazy.py b/nemo/collections/vlm/neva/data/lazy.py
index d0143cffc0c8..57aa5b408835 100644
--- a/nemo/collections/vlm/neva/data/lazy.py
+++ b/nemo/collections/vlm/neva/data/lazy.py
@@ -17,8 +17,7 @@
 import os
 import re
 import tarfile
-from typing import Any, Dict, List, Sequence
-from typing import Optional
+from typing import Any, Dict, List, Optional, Sequence
 
 import decord
 import numpy as np
@@ -28,8 +27,7 @@
 from PIL import Image
 from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
 from torch.utils import data
-from torch.utils.data import DataLoader
-from torch.utils.data import Dataset, default_collate
+from torch.utils.data import DataLoader, Dataset, default_collate
 from transformers import CLIPImageProcessor, SiglipImageProcessor
 
 from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids
@@ -239,7 +237,7 @@ def find_pattern_indices(template, pattern, search_start_index=0, allow_first_to
     template_len = len(template)
     pattern_len = len(pattern)
     for i in range(search_start_index, template_len - pattern_len + 1):
-        match = template[i: i + pattern_len] == pattern
+        match = template[i : i + pattern_len] == pattern
         if torch.all(match) or (allow_first_token_mismatch and torch.all(match[1:])):
             return i, i + pattern_len
     return -1, -1
@@ -248,12 +246,12 @@ def find_pattern_indices(template, pattern, search_start_index=0, allow_first_to
 class LazySupervisedDataset(Dataset):
 
     def __init__(
-            self,
-            data_path,
-            data_config,
-            tokenizer,
-            image_processor,
-            sequence_length,
+        self,
+        data_path,
+        data_config,
+        tokenizer,
+        image_processor,
+        sequence_length,
     ):
         super().__init__()
         if data_path is not None:
@@ -352,7 +350,7 @@ def _tokenize_and_label(self, conversations):
         for i in range(1, len(self.conv.messages), 2):
             stop_str = getattr(self.conv, "stop_str", None)
             assert (
-                    stop_str is not None
+                stop_str is not None
             ), "If `stop_str` is not provided, issues might occur in labeling the answer tokens."
             answer_tokens = self.tokenizer.encode(
                 self.conv.messages[i][1] + ("" if stop_str is None else stop_str),
@@ -378,11 +376,11 @@ class NevaDataset(LazySupervisedDataset):
     """Dataset for supervised fine-tuning."""
 
     def __init__(
-            self,
-            data_path,
-            data_config,
-            tokenizer,
-            image_processor,
+        self,
+        data_path,
+        data_config,
+        tokenizer,
+        image_processor,
     ):
 
         if data_path.endswith(".json"):
@@ -494,23 +492,23 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
 
 class NevaLazyDataModule(pl.LightningDataModule):
     def __init__(
-            self,
-            paths: str|List[str],
-            weights: Optional[List[float]] = None,
-            data_config: Optional[DataConfig] = ImageDataConfig,
-            seq_length: int = 2048,
-            tokenizer: Optional = None,
-            image_processor: Optional = None,
-            micro_batch_size: int = 4,
-            global_batch_size: int = 8,
-            num_train_samples: int = 10_000,
-            num_val_samples: int = 10_000,
-            num_test_samples: int = 10_000,
-            num_workers: int = 8,
-            pin_memory: bool = True,
-            persistent_workers: bool = False,
-            use_packed_sequence: bool = False,
-            seed: int = 1234,
+        self,
+        paths: str | List[str],
+        weights: Optional[List[float]] = None,
+        data_config: Optional[DataConfig] = ImageDataConfig,
+        seq_length: int = 2048,
+        tokenizer: Optional = None,
+        image_processor: Optional = None,
+        micro_batch_size: int = 4,
+        global_batch_size: int = 8,
+        num_train_samples: int = 10_000,
+        num_val_samples: int = 10_000,
+        num_test_samples: int = 10_000,
+        num_workers: int = 8,
+        pin_memory: bool = True,
+        persistent_workers: bool = False,
+        use_packed_sequence: bool = False,
+        seed: int = 1234,
     ) -> None:
         super().__init__()
         if not isinstance(paths, (list, tuple)):