Skip to content

Commit

Permalink
[fix] Progress bar style to ' >='
Browse files Browse the repository at this point in the history
  • Loading branch information
bhavnicksm committed Jan 6, 2025
1 parent ba7caae commit 49d4afe
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
29 changes: 18 additions & 11 deletions src/chonkie/chunker/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,24 +249,31 @@ def _process_batch_sequential(self,
desc="πŸ¦› CHONKING",
disable=not show_progress_bar,
unit="texts",
bar_format="{desc}: |{bar:20}| {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
ascii=' β–β–Žβ–β–Œβ–‹β–Šβ–‰'
)
]
bar_format="{desc}: [{bar:20}] {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
ascii=' >=')
]

def _process_batch_multiprocessing(self,
texts: List[str],
show_progress_bar: bool = True) -> List[List[Chunk]]:
"""Process a batch of texts using multiprocessing."""
num_workers = self._determine_optimal_workers()
total = len(texts)
chunksize = max(1, min(total // (num_workers * 16), 10)) # Optimize chunk size

with Pool(processes=num_workers) as pool:
return list(tqdm(pool.imap(self.chunk, texts),
desc="πŸ¦› CHONKING",
disable=not show_progress_bar,
unit="texts",
bar_format="{desc}: |{bar:20}| {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
ascii=' β–β–Žβ–β–Œβ–‹β–Šβ–‰'))

results = []
with tqdm(total=total,
desc="πŸ¦› CHONKING",
disable=not show_progress_bar,
unit="texts",
bar_format="{desc}: [{bar:20}] {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} texts chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
ascii=' >=') as pbar:
for result in pool.imap_unordered(self.chunk, texts, chunksize=chunksize):
results.append(result)
pbar.update()
return results

def chunk_batch(
self,
texts: List[str],
Expand Down
4 changes: 2 additions & 2 deletions src/chonkie/chunker/token.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ def chunk_batch(
desc="πŸ¦› CHONKING",
disable=not show_progress_bar,
unit="batches",
ascii=" β–β–Žβ–β–Œβ–‹β–Šβ–‰",
bar_format="{desc}: |{bar:20}| {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} batches chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱"):
bar_format="{desc}: [{bar:20}] {percentage:3.0f}% β€’ {n_fmt}/{total_fmt} batches chunked [{elapsed}<{remaining}, {rate_fmt}] 🌱",
ascii=' >='):
batch_texts = texts[i : min(i + batch_size, len(texts))]
chunks.extend(self._process_text_batch(batch_texts))
return chunks
Expand Down

0 comments on commit 49d4afe

Please sign in to comment.