dice-group · Demirrr · Aug 21, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/dicee/read_preprocess_save_load_kg/util.py b/dicee/read_preprocess_save_load_kg/util.py
@@ -77,7 +77,7 @@ def read_with_polars(data_path, read_only_few: int = None, sample_triples_ratio:
 @timeit
 def read_with_pandas(data_path, read_only_few: int = None, sample_triples_ratio: float = None):
     print(f'*** Reading {data_path} with Pandas ***')
-    if data_path[-3:] in ["ttl", 'txt', 'csv', 'zst']:
+    if data_path[-3:] in [".nt","ttl", 'txt', 'csv', 'zst']:
         print('Reading with pandas.read_csv with sep ** s+ ** ...')
         # TODO: if byte_pair_encoding=True, we should not use "\s+" as seperator I guess
         df = pd.read_csv(data_path,

diff --git a/dicee/trainer/torch_trainer.py b/dicee/trainer/torch_trainer.py
@@ -4,6 +4,7 @@
 import time
 import os
 import psutil
+from tqdm import tqdm
 
 
 class TorchTrainer(AbstractTrainer):
@@ -81,6 +82,7 @@ def _run_epoch(self, epoch: int) -> float:
             # (2) Forward-Backward-Update.
             batch_loss = self._run_batch(i, x_batch, y_batch)
             epoch_loss += batch_loss
+            """
             if construct_mini_batch_time:
                 print(
                     f"Epoch:{epoch + 1} "
@@ -97,6 +99,8 @@ def _run_epoch(self, epoch: int) -> float:
                     f"| Loss:{batch_loss} "
                     f"| ForwardBackwardUpdate:{(time.time() - start_time):.2f}secs "
                     f"| Mem. Usage {self.process.memory_info().rss / 1_000_000: .5}MB ")
+            """
+
             construct_mini_batch_time = time.time()
         return epoch_loss / (i + 1)
 
@@ -130,14 +134,20 @@ def fit(self, *args, train_dataloaders, **kwargs) -> None:
               f'| LearningRate:{self.model.learning_rate} '
               f'| BatchSize:{self.train_dataloaders.batch_size} '
               f'| EpochBatchsize:{len(train_dataloaders)}')
-        for epoch in range(self.attributes.max_epochs):
+
+        for epoch in (tqdm_bar := tqdm(range(self.attributes.max_epochs))):
             start_time = time.time()
 
             avg_epoch_loss = self._run_epoch(epoch)
+            tqdm_bar.set_description_str(f"Epoch:{epoch + 1} " f"| Loss:{avg_epoch_loss:.8f} "f"| Runtime:{(time.time() - start_time) / 60:.3f} mins")
+
+            """
             print(f"Epoch:{epoch + 1} "
                   f"| Loss:{avg_epoch_loss:.8f} "
                   f"| Runtime:{(time.time() - start_time) / 60:.3f} mins")
             """
+
+            """
             # Autobatch Finder: Double the current batch size if memory allows and repeat this process at mast 5 times.
             if self.attributes.auto_batch_finder and psutil.virtual_memory().percent < 30.0 and counter < 5:
                 self.train_dataloaders = DataLoader(dataset=self.train_dataloaders.dataset,