Skip to content

Commit

Permalink
fixes #64 : fix inconsistency between segments when there are empty text
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeronymous committed Feb 25, 2024
1 parent 6197f08 commit cf576e5
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions whisper_timestamped/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,9 @@ def filter_tokens(tokens):
assert len(segment_logprobs) == len(segment_tokens), f"Inconsistent number of segments: logprobs ({len(segment_logprobs)}) != tokens ({len(segment_tokens)})"

whisper_segments = transcription["segments"]
# See issue 64: some segments may have empty text
if any(not s["text"] for s in whisper_segments):
whisper_segments = [s for s in whisper_segments if s["text"]]
l1 = len(whisper_segments)
l2 = len(timestamped_word_segments)
if l1 != l2 and l1 != 0:
Expand Down

0 comments on commit cf576e5

Please sign in to comment.