From 49d4afe3db645bd7ae99519bcefd24120cc99ab9 Mon Sep 17 00:00:00 2001
From: bhavnicksm <bhavnicksm@gmail.com>
Date: Tue, 7 Jan 2025 03:31:58 +0530
Subject: [PATCH] [fix] Progress bar style to ' >='

---
 src/chonkie/chunker/base.py  | 29 ++++++++++++++++++-----------
 src/chonkie/chunker/token.py |  4 ++--
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/chonkie/chunker/base.py b/src/chonkie/chunker/base.py
index 322adcd..35ab3ab 100644
--- a/src/chonkie/chunker/base.py
+++ b/src/chonkie/chunker/base.py
@@ -249,24 +249,31 @@ def _process_batch_sequential(self,
                     desc="🦛 CHONKING",
                     disable=not show_progress_bar,
                     unit="texts",
-                    bar_format="{desc}: |{bar:20}| {percentage:3.0f}% • {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
-                    ascii=' ▏▎▍▌▋▊▉'
-                )
-            ]
+                    bar_format="{desc}: [{bar:20}] {percentage:3.0f}% • {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱", 
+                    ascii=' >=')
+        ]
     
     def _process_batch_multiprocessing(self,
                                      texts: List[str],
                                      show_progress_bar: bool = True) -> List[List[Chunk]]:
         """Process a batch of texts using multiprocessing."""
         num_workers = self._determine_optimal_workers()
+        total = len(texts)
+        chunksize = max(1, min(total // (num_workers * 16), 10)) # Optimize chunk size
+        
         with Pool(processes=num_workers) as pool:
-            return list(tqdm(pool.imap(self.chunk, texts),
-                             desc="🦛 CHONKING",
-                             disable=not show_progress_bar,
-                             unit="texts",
-                             bar_format="{desc}: |{bar:20}| {percentage:3.0f}% • {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
-                             ascii=' ▏▎▍▌▋▊▉'))
-    
+            results = []
+            with tqdm(total=total,
+                     desc="🦛 CHONKING",
+                     disable=not show_progress_bar,
+                     unit="texts",
+                     bar_format="{desc}: [{bar:20}] {percentage:3.0f}% • {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
+                     ascii=' >=') as pbar:
+                for result in pool.imap_unordered(self.chunk, texts, chunksize=chunksize):
+                    results.append(result)
+                    pbar.update()
+            return results
+        
     def chunk_batch(
         self,
         texts: List[str],
diff --git a/src/chonkie/chunker/token.py b/src/chonkie/chunker/token.py
index 98391d6..3816402 100644
--- a/src/chonkie/chunker/token.py
+++ b/src/chonkie/chunker/token.py
@@ -194,8 +194,8 @@ def chunk_batch(
                         desc="🦛 CHONKING",
                         disable=not show_progress_bar, 
                         unit="batches",
-                        ascii=" ▏▎▍▌▋▊▉",
-                        bar_format="{desc}: |{bar:20}| {percentage:3.0f}% • {n_fmt}/{total_fmt} batches chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱"):
+                        bar_format="{desc}: [{bar:20}] {percentage:3.0f}% • {n_fmt}/{total_fmt} batches chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
+                        ascii=' >='):
             batch_texts = texts[i : min(i + batch_size, len(texts))]
             chunks.extend(self._process_text_batch(batch_texts))
         return chunks