Skip to content

Commit

Permalink
[hotfix] Fix speaker assignment bug
Browse files Browse the repository at this point in the history
Speaker assignment treated the result as a list when it was actually a
dictionary.
  • Loading branch information
chrisbrickhouse committed Jan 30, 2024
1 parent 6fa660a commit 0f89a62
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,21 @@ def assign_speakers(
result_segments = assign_word_speakers(
diarization_result, aligned_segments
)
results_segments_w_speakers: List[Dict[str, Any]] = []
for result_segment in result_segments:
results_segments_w_speakers.append(
{
"start": result_segment["start"],
"end": result_segment["end"],
"text": result_segment["text"],
"speaker": result_segment["speaker"],
}
)
return results_segments_w_speakers
# Upstream uses this, but it's bugged and I think upstream's upstream has since adopted the
# output that it tries to create making it redundant
#
#results_segments_w_speakers: List[Dict[str, Any]] = []
#for result_segment in result_segments['segments']:
# results_segments_w_speakers.append(
# {
# "start": result_segment["start"],
# "end": result_segment["end"],
# "text": result_segment["text"],
# "speaker": result_segment["speaker"],
# "words": result_segment["words"]
# }
# )
return result_segments

def transcribe_and_diarize(
audio_file: str,
Expand Down Expand Up @@ -133,7 +137,7 @@ def transcribe_and_diarize(
results_segments_w_speakers = assign_speakers(diarization_result, aligned_segments)

# Print the results in a user-friendly way
for i, segment in enumerate(results_segments_w_speakers):
for i, segment in enumerate(results_segments_w_speakers['segments']):
print(f"Segment {i + 1}:")
print(f"Start time: {segment['start']:.2f}")
print(f"End time: {segment['end']:.2f}")
Expand Down

0 comments on commit 0f89a62

Please sign in to comment.