diff --git a/formatter.py b/formatter.py
index b35963b..50d42ed 100644
--- a/formatter.py
+++ b/formatter.py
@@ -12,18 +12,21 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import warnings
+
 import textgrid
 
 class Formatter():
     def __init__(self):
         pass
 
-    def to_TextGrid(self, diarized_transcription):
+    def to_TextGrid(self, diarized_transcription, by_phrase=True):
         """
         Convert a diarized transcription dictionary to a TextGrid
 
         Args:
             diarized_transcription: Output of pipeline.assign_speakers()
+            by_phrase: Flag for whether the intervals should be by phrase (True) or word (False)
 
         Returns:
             A textgrid.TextGrid object populated with the diarized and
@@ -34,29 +37,44 @@ def to_TextGrid(self, diarized_transcription):
         maxTime = diarized_transcription['segments'][-1]['end']
         tg = textgrid.TextGrid(minTime=minTime,maxTime=maxTime)
 
-        speakers = [x['speaker'] for x in diarized_transcription['segments']]
+        speakers = [x['speaker'] for x in diarized_transcription['segments'] if 'speaker' in x]
         for speaker in set(speakers):
             tg.append(textgrid.IntervalTier(name=speaker,minTime=minTime,maxTime=maxTime))
         # Create a lookup table of tier indices based on the given speaker name
         tier_key = dict((name,index) for index, name in enumerate([x.name for x in tg.tiers]))
 
-        for segment in diarized_transcription['segments']:
+        for i in range(len(diarized_transcription['segments'])):
+            segment = diarized_transcription['segments'][i]
             # There's no guarantee, weirdly, that a given word's assigned speaker
             # is the same as the speaker assigned to the whole segment. Since
             # the tiers are based on assigned /segment/ speakers, not assigned 
             # word speakers, we need to look up the tier in the segment loop
             # not in the word loop. See Issue #7
+            if 'speaker' not in segment:
+                warnings.warn('No speaker for segment')
+                #print(segment)
+                continue
             tier_index = tier_key[segment['speaker']]
             tier = tg.tiers[tier_index]
             minTime = segment['start']
-            maxTime = segment['end']
+            if i+1 == len(diarized_transcription['segments']):
+                maxTime = segment['end']
+            else:
+                maxTime = diarized_transcription['segments'][i+1]['start']
             mark = segment['text']
-            tier.add(minTime,maxTime,mark)
-            # In testing, the word-level alignments are not very good. A future version
-            # might want to add an option for end users to enable the following loop.
-            #for word in segment['words']:
-            #    minTime = word['start']
-            #    maxTime = word['end']
-            #    mark = word['word']
-            #    tier.add(minTime,maxTime,mark)
+            if by_phrase:
+                tier.add(minTime,maxTime,mark)
+                continue
+            for word in segment['words']:
+                if 'speaker' not in word:
+                    warnings.warn('No speaker assigned to word, using phrase-level speaker')
+                elif word['speaker'] != segment['speaker']:
+                    warnings.warn('Mismatched speaker for word and phrase, using phrase-level speaker')
+                    #print(word['speaker'],word)
+                    #print(segment['speaker'],segment)
+                    #raise ValueError('Word and segment have different speakers')
+                minTime = word['start']
+                maxTime = word['end']
+                mark = word['text']
+                tier.add(minTime,maxTime,mark)
         return tg
diff --git a/pipeline.py b/pipeline.py
index 0af857d..4a421c1 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -12,11 +12,16 @@
 import psutil
 import GPUtil
 import matplotlib.pyplot as plt
-import whisper
+import whisper_timestamped as whisper
 from whisperx import load_align_model, align
 from whisperx.diarize import DiarizationPipeline, assign_word_speakers
 
-def transcribe(audio_file: str, model_name: str, device: str = "cpu") -> Dict[str, Any]:
+def transcribe(
+        audio_file: str, 
+        model_name: str, 
+        device: str = "cpu",
+        detect_disfluencies: bool = True
+        ) -> Dict[str, Any]:
     """
     Transcribe an audio file using a whisper model.
 
@@ -24,14 +29,16 @@ def transcribe(audio_file: str, model_name: str, device: str = "cpu") -> Dict[st
         audio_file: Path to the audio file to transcribe.
         model_name: Name of the model to use for transcription.
         device: The device to use for inference (e.g., "cpu" or "cuda").
+        detect_disfluencies: Flag for whether the transcription should include disfluencies, marked with [*]
 
     Returns:
         A dictionary representing the transcript segments and language code.
     """
-    model = whisper.load_model(model_name, device)
-    result = model.transcribe(audio_file)
+    model = whisper.load_model(model_name, device=device)
+    audio = whisper.load_audio(audio_file)
+    result = whisper.transcribe(model, audio_file,detect_disfluencies=detect_disfluencies)
 
-    language_code = result["language"]
+    language_code = result['language']
     return {
         "segments": result["segments"],
         "language_code": language_code,
@@ -130,11 +137,11 @@ def transcribe_and_diarize(
         spoken text, and the speaker ID.
     """
     transcript = transcribe(audio_file, model_name, device)
-    aligned_segments = align_segments(
-        transcript["segments"], transcript["language_code"], audio_file, device
-    )
+    #aligned_segments = align_segments(
+    #    transcript["segments"], transcript["language_code"], audio_file, device
+    #)
     diarization_result = diarize(audio_file, hf_token)
-    results_segments_w_speakers = assign_speakers(diarization_result, aligned_segments)
+    results_segments_w_speakers = assign_speakers(diarization_result, transcript)
 
     # Print the results in a user-friendly way
     for i, segment in enumerate(results_segments_w_speakers['segments']):
diff --git a/requirements.txt b/requirements.txt
index b18a8e2..4b32241 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-openai-whisper @ git+https://github.com/openai/whisper.git@b38a1f20f4b23f3f3099af2c3e0ca95627276ddf
 whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130e4e0c0d99d60715d76e65a71826a97109
+whisper_timestamped
 GPUtil
 psutil
 textgrid
diff --git a/tests/data/TestAudio_SnoopDogg_85SouthMedia_WhisperTimestampSegments.json b/tests/data/TestAudio_SnoopDogg_85SouthMedia_WhisperTimestampSegments.json
new file mode 100644
index 0000000..57b59e4
--- /dev/null
+++ b/tests/data/TestAudio_SnoopDogg_85SouthMedia_WhisperTimestampSegments.json
@@ -0,0 +1 @@
+{"segments": [{"id": 0, "seek": 0, "start": 0.22, "end": 3.44, "text": " So, you know the pimpin', fuck y'all.", "tokens": [50363, 1406, 11, 345, 760, 262, 279, 11011, 259, 3256, 5089, 331, 6, 439, 13, 50543], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.87, "words": [{"text": "So,", "start": 0.22, "end": 0.6, "confidence": 0.88, "speaker": "SPEAKER_00"}, {"text": "you", "start": 0.66, "end": 1.2, "confidence": 0.986, "speaker": "SPEAKER_00"}, {"text": "know", "start": 1.2, "end": 1.34, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "the", "start": 1.34, "end": 1.46, "confidence": 0.656, "speaker": "SPEAKER_00"}, {"text": "pimpin',", "start": 1.46, "end": 2.6, "confidence": 0.877, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 2.6, "end": 2.84, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "fuck", "start": 2.84, "end": 2.98, "confidence": 0.79, "speaker": "SPEAKER_00"}, {"text": "y'all.", "start": 2.98, "end": 3.44, "confidence": 0.895, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 1, "seek": 0, "start": 3.96, "end": 5.28, "text": " I'm finna go over to Def Jam", "tokens": [50543, 314, 1101, 957, 2616, 467, 625, 284, 2896, 9986, 50658], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.868, "words": [{"text": "I'm", "start": 3.96, "end": 4.18, "confidence": 0.984, "speaker": "SPEAKER_00"}, {"text": "finna", "start": 4.18, "end": 4.3, "confidence": 0.924, "speaker": "SPEAKER_00"}, {"text": "go", "start": 4.3, "end": 4.44, "confidence": 0.964, "speaker": "SPEAKER_00"}, {"text": "over", "start": 4.44, "end": 4.58, "confidence": 0.891, "speaker": "SPEAKER_00"}, {"text": "to", "start": 4.58, "end": 4.7, "confidence": 0.847, "speaker": "SPEAKER_00"}, {"text": "Def", "start": 4.7, "end": 4.94, "confidence": 0.485, "speaker": "SPEAKER_00"}, {"text": "Jam", "start": 4.94, "end": 5.28, "confidence": 0.964, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 2, "seek": 0, "start": 6.24, "end": 7.54, "text": " and learn a little bit of corporate work,", "tokens": [50658, 290, 2193, 257, 1310, 1643, 286, 6355, 670, 11, 50725], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.981, "words": [{"text": "and", "start": 6.24, "end": 6.4, "confidence": 0.989, "speaker": "SPEAKER_00"}, {"text": "learn", "start": 6.4, "end": 6.56, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "a", "start": 6.56, "end": 6.68, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "little", "start": 6.68, "end": 6.78, "confidence": 0.991, "speaker": "SPEAKER_00"}, {"text": "bit", "start": 6.78, "end": 6.92, "confidence": 0.992, "speaker": "SPEAKER_00"}, {"text": "of", "start": 6.92, "end": 7.02, "confidence": 0.985, "speaker": "SPEAKER_00"}, {"text": "corporate", "start": 7.02, "end": 7.32, "confidence": 0.905, "speaker": "SPEAKER_00"}, {"text": "work,", "start": 7.32, "end": 7.54, "confidence": 0.994, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 3, "seek": 0, "start": 7.58, "end": 8.7, "text": " because I don't know corporate yet.", "tokens": [50725, 780, 314, 836, 470, 760, 6355, 1865, 13, 50800], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.899, "words": [{"text": "because", "start": 7.58, "end": 7.7, "confidence": 0.548, "speaker": "SPEAKER_00"}, {"text": "I", "start": 7.7, "end": 7.82, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "don't", "start": 7.82, "end": 7.96, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "know", "start": 7.96, "end": 8.08, "confidence": 0.988, "speaker": "SPEAKER_00"}, {"text": "corporate", "start": 8.08, "end": 8.4, "confidence": 0.957, "speaker": "SPEAKER_00"}, {"text": "yet.", "start": 8.4, "end": 8.7, "confidence": 0.924, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 4, "seek": 0, "start": 8.82, "end": 9.96, "text": " I only need a few months.", "tokens": [50800, 314, 691, 761, 257, 1178, 1933, 13, 50862], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.953, "words": [{"text": "I", "start": 8.82, "end": 9.2, "confidence": 0.815, "speaker": "SPEAKER_01"}, {"text": "only", "start": 9.2, "end": 9.32, "confidence": 0.937, "speaker": "SPEAKER_00"}, {"text": "need", "start": 9.32, "end": 9.44, "confidence": 0.987, "speaker": "SPEAKER_00"}, {"text": "a", "start": 9.44, "end": 9.54, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "few", "start": 9.54, "end": 9.68, "confidence": 1.0, "speaker": "SPEAKER_00"}, {"text": "months.", "start": 9.68, "end": 9.96, "confidence": 0.998, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 5, "seek": 0, "start": 10.34, "end": 11.42, "text": " You give me a few months to run the shit,", "tokens": [50862, 921, 1577, 502, 257, 1178, 1933, 284, 1057, 262, 7510, 11, 50920], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.803, "words": [{"text": "You", "start": 10.34, "end": 10.48, "confidence": 0.699, "speaker": "SPEAKER_00"}, {"text": "give", "start": 10.48, "end": 10.54, "confidence": 0.946, "speaker": "SPEAKER_00"}, {"text": "me", "start": 10.54, "end": 10.64, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "a", "start": 10.64, "end": 10.72, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "few", "start": 10.72, "end": 10.84, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "months", "start": 10.84, "end": 11.02, "confidence": 0.994, "speaker": "SPEAKER_00"}, {"text": "to", "start": 11.02, "end": 11.14, "confidence": 0.728, "speaker": "SPEAKER_00"}, {"text": "run", "start": 11.14, "end": 11.24, "confidence": 0.763, "speaker": "SPEAKER_00"}, {"text": "the", "start": 11.24, "end": 11.34, "confidence": 0.399, "speaker": "SPEAKER_00"}, {"text": "shit,", "start": 11.34, "end": 11.42, "confidence": 0.778, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 6, "seek": 0, "start": 11.42, "end": 12.88, "text": " I'm a fast learner.", "tokens": [50920, 314, 1101, 257, 3049, 22454, 1008, 13, 50997], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.76, "words": [{"text": "I'm", "start": 11.42, "end": 11.66, "confidence": 0.78, "speaker": "SPEAKER_00"}, {"text": "a", "start": 11.66, "end": 11.74, "confidence": 0.391, "speaker": "SPEAKER_00"}, {"text": "fast", "start": 11.74, "end": 12.14, "confidence": 0.821, "speaker": "SPEAKER_00"}, {"text": "learner.", "start": 12.14, "end": 12.88, "confidence": 0.996, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 7, "seek": 0, "start": 12.98, "end": 14.86, "text": " Go to Def Jam, get a job in a position,", "tokens": [50997, 1514, 284, 2896, 9986, 11, 651, 257, 1693, 287, 257, 2292, 11, 51109], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.957, "words": [{"text": "Go", "start": 12.98, "end": 13.2, "confidence": 0.983, "speaker": "SPEAKER_00"}, {"text": "to", "start": 13.2, "end": 13.32, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "Def", "start": 13.32, "end": 13.54, "confidence": 0.987, "speaker": "SPEAKER_00"}, {"text": "Jam,", "start": 13.54, "end": 13.76, "confidence": 0.993, "speaker": "SPEAKER_00"}, {"text": "get", "start": 13.88, "end": 13.92, "confidence": 0.986, "speaker": "SPEAKER_00"}, {"text": "a", "start": 13.92, "end": 14.06, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "job", "start": 14.06, "end": 14.24, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "in", "start": 14.24, "end": 14.38, "confidence": 0.808, "speaker": "SPEAKER_00"}, {"text": "a", "start": 14.38, "end": 14.46, "confidence": 0.85, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 14.46, "end": 14.72, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "position,", "start": 14.72, "end": 14.86, "confidence": 0.994, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 8, "seek": 0, "start": 15.24, "end": 17.02, "text": " drop a record, get Benny the Butcher signed,", "tokens": [51109, 4268, 257, 1700, 11, 651, 44275, 262, 39680, 4488, 11, 51199], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.963, "words": [{"text": "drop", "start": 15.24, "end": 15.52, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "a", "start": 15.52, "end": 15.68, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "record,", "start": 15.68, "end": 15.9, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "get", "start": 16.02, "end": 16.12, "confidence": 0.961, "speaker": "SPEAKER_00"}, {"text": "Benny", "start": 16.12, "end": 16.32, "confidence": 0.97, "speaker": "SPEAKER_00"}, {"text": "the", "start": 16.32, "end": 16.46, "confidence": 0.92, "speaker": "SPEAKER_00"}, {"text": "Butcher", "start": 16.46, "end": 16.7, "confidence": 0.988, "speaker": "SPEAKER_00"}, {"text": "signed,", "start": 16.7, "end": 17.02, "confidence": 0.879, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 9, "seek": 0, "start": 17.08, "end": 18.3, "text": " get Hip Hop Harry signed,", "tokens": [51199, 651, 29437, 9996, 5850, 4488, 11, 51292], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.927, "words": [{"text": "get", "start": 17.08, "end": 17.26, "confidence": 0.992, "speaker": "SPEAKER_00"}, {"text": "Hip", "start": 17.26, "end": 17.44, "confidence": 0.924, "speaker": "SPEAKER_00"}, {"text": "Hop", "start": 17.44, "end": 17.62, "confidence": 0.76, "speaker": "SPEAKER_00"}, {"text": "Harry", "start": 17.62, "end": 17.88, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "signed,", "start": 17.88, "end": 18.3, "confidence": 0.985, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 10, "seek": 0, "start": 18.9, "end": 20.32, "text": " learn a few tricks of the trade,", "tokens": [51292, 2193, 257, 1178, 15910, 286, 262, 3292, 11, 51367], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.976, "words": [{"text": "learn", "start": 18.9, "end": 19.18, "confidence": 0.959, "speaker": "SPEAKER_00"}, {"text": "a", "start": 19.18, "end": 19.34, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "few", "start": 19.34, "end": 19.48, "confidence": 1.0, "speaker": "SPEAKER_00"}, {"text": "tricks", "start": 19.48, "end": 19.74, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "of", "start": 19.74, "end": 19.88, "confidence": 0.898, "speaker": "SPEAKER_00"}, {"text": "the", "start": 19.88, "end": 19.98, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "trade,", "start": 19.98, "end": 20.32, "confidence": 0.994, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 11, "seek": 0, "start": 20.4, "end": 22.46, "text": " find out that the niggas that had it", "tokens": [51367, 1064, 503, 326, 262, 299, 6950, 292, 326, 550, 340, 51467], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.978, "words": [{"text": "find", "start": 20.4, "end": 20.74, "confidence": 0.947, "speaker": "SPEAKER_00"}, {"text": "out", "start": 20.74, "end": 21.1, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "that", "start": 21.1, "end": 21.58, "confidence": 0.981, "speaker": "SPEAKER_00"}, {"text": "the", "start": 21.58, "end": 21.7, "confidence": 0.994, "speaker": "SPEAKER_00"}, {"text": "niggas", "start": 21.7, "end": 21.96, "confidence": 0.978, "speaker": "SPEAKER_00"}, {"text": "that", "start": 21.96, "end": 22.12, "confidence": 0.945, "speaker": "SPEAKER_00"}, {"text": "had", "start": 22.12, "end": 22.32, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "it", "start": 22.32, "end": 22.46, "confidence": 0.993, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 12, "seek": 0, "start": 22.46, "end": 23.72, "text": " that wanted me to hold for them,", "tokens": [51467, 326, 2227, 502, 284, 1745, 329, 606, 11, 51544], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.935, "words": [{"text": "that", "start": 22.46, "end": 22.6, "confidence": 0.976, "speaker": "SPEAKER_00"}, {"text": "wanted", "start": 22.6, "end": 22.76, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "me", "start": 22.76, "end": 22.9, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "to", "start": 22.9, "end": 23.04, "confidence": 0.984, "speaker": "SPEAKER_00"}, {"text": "hold", "start": 23.04, "end": 23.2, "confidence": 0.856, "speaker": "SPEAKER_00"}, {"text": "for", "start": 23.2, "end": 23.42, "confidence": 0.99, "speaker": "SPEAKER_00"}, {"text": "them,", "start": 23.42, "end": 23.72, "confidence": 0.772, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 13, "seek": 0, "start": 23.88, "end": 25.4, "text": " then sold it to some other people.", "tokens": [51544, 788, 2702, 340, 284, 617, 584, 661, 13, 51654], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.987, "words": [{"text": "then", "start": 23.88, "end": 24.18, "confidence": 0.923, "speaker": "SPEAKER_00"}, {"text": "sold", "start": 24.18, "end": 24.46, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "it", "start": 24.46, "end": 24.58, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "to", "start": 24.58, "end": 24.72, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "some", "start": 24.72, "end": 24.88, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "other", "start": 24.88, "end": 25.1, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "people.", "start": 25.1, "end": 25.4, "confidence": 1.0, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 14, "seek": 0, "start": 26.14, "end": 28.69, "text": " So now, one of my big wig buddies called me", "tokens": [51654, 1406, 783, 11, 530, 286, 616, 1263, 45678, 35548, 1444, 502, 51779], "temperature": 0.0, "avg_logprob": -0.2396322811351103, "compression_ratio": 1.6773162939297124, "no_speech_prob": 0.06969249993562698, "confidence": 0.923, "words": [{"text": "So", "start": 26.14, "end": 26.4, "confidence": 0.991, "speaker": "SPEAKER_00"}, {"text": "now,", "start": 26.4, "end": 26.76, "confidence": 0.83, "speaker": "SPEAKER_00"}, {"text": "one", "start": 27.12, "end": 27.26, "confidence": 0.993, "speaker": "SPEAKER_00"}, {"text": "of", "start": 27.26, "end": 27.36, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "my", "start": 27.36, "end": 27.52, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "big", "start": 27.52, "end": 27.78, "confidence": 0.92, "speaker": "SPEAKER_00"}, {"text": "wig", "start": 27.78, "end": 28.0, "confidence": 0.715, "speaker": "SPEAKER_00"}, {"text": "buddies", "start": 28.0, "end": 28.26, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "called", "start": 28.26, "end": 28.54, "confidence": 0.841, "speaker": "SPEAKER_00"}, {"text": "me", "start": 28.54, "end": 28.69, "confidence": 0.997, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 15, "seek": 2832, "start": 28.69, "end": 31.38, "text": " and said, hey dog, I know the people that got Def Ro.", "tokens": [50383, 290, 531, 11, 17207, 3290, 11, 314, 760, 262, 661, 326, 1392, 2896, 5564, 13, 50550], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.713, "words": [{"text": "and", "start": 28.69, "end": 28.84, "confidence": 0.979, "speaker": "SPEAKER_00"}, {"text": "said,", "start": 28.84, "end": 28.98, "confidence": 0.765, "speaker": "SPEAKER_00"}, {"text": "hey", "start": 29.1, "end": 29.18, "confidence": 0.816, "speaker": "SPEAKER_00"}, {"text": "dog,", "start": 29.18, "end": 29.48, "confidence": 0.455, "speaker": "SPEAKER_00"}, {"text": "I", "start": 29.94, "end": 30.04, "confidence": 0.94, "speaker": "SPEAKER_00"}, {"text": "know", "start": 30.04, "end": 30.18, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "the", "start": 30.18, "end": 30.32, "confidence": 0.992, "speaker": "SPEAKER_00"}, {"text": "people", "start": 30.32, "end": 30.54, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "that", "start": 30.54, "end": 30.7, "confidence": 0.845, "speaker": "SPEAKER_00"}, {"text": "got", "start": 30.7, "end": 30.92, "confidence": 0.982, "speaker": "SPEAKER_00"}, {"text": "Def", "start": 30.92, "end": 31.18, "confidence": 0.362, "speaker": "SPEAKER_00"}, {"text": "Ro.", "start": 31.18, "end": 31.38, "confidence": 0.224, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 16, "seek": 2832, "start": 32.02, "end": 33.44, "text": " And they don't know what to do with it.", "tokens": [50550, 843, 484, 836, 470, 760, 644, 284, 466, 351, 340, 13, 50668], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.992, "words": [{"text": "And", "start": 32.02, "end": 32.2, "confidence": 0.977, "speaker": "SPEAKER_00"}, {"text": "they", "start": 32.2, "end": 32.3, "confidence": 0.987, "speaker": "SPEAKER_00"}, {"text": "don't", "start": 32.3, "end": 32.46, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "know", "start": 32.46, "end": 32.58, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "what", "start": 32.58, "end": 32.7, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "to", "start": 32.7, "end": 32.84, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "do", "start": 32.84, "end": 33.02, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "with", "start": 33.02, "end": 33.18, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "it.", "start": 33.18, "end": 33.44, "confidence": 0.985}], "speaker": "SPEAKER_00"}, {"id": 17, "seek": 2832, "start": 34.22, "end": 36.24, "text": " Let me holler at them, I know just what to do with it.", "tokens": [50668, 3914, 502, 289, 49252, 379, 606, 11, 314, 760, 655, 644, 284, 466, 351, 340, 13, 50760], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.928, "words": [{"text": "Let", "start": 34.22, "end": 34.56, "confidence": 0.848, "speaker": "SPEAKER_02"}, {"text": "me", "start": 34.56, "end": 34.7, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "holler", "start": 34.7, "end": 34.9, "confidence": 0.827, "speaker": "SPEAKER_00"}, {"text": "at", "start": 34.9, "end": 35.08, "confidence": 0.993, "speaker": "SPEAKER_00"}, {"text": "them,", "start": 35.08, "end": 35.18, "confidence": 0.843, "speaker": "SPEAKER_00"}, {"text": "I", "start": 35.24, "end": 35.3, "confidence": 0.854, "speaker": "SPEAKER_00"}, {"text": "know", "start": 35.3, "end": 35.48, "confidence": 0.963, "speaker": "SPEAKER_00"}, {"text": "just", "start": 35.48, "end": 35.68, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "what", "start": 35.68, "end": 35.86, "confidence": 0.994, "speaker": "SPEAKER_00"}, {"text": "to", "start": 35.86, "end": 35.98, "confidence": 0.988, "speaker": "SPEAKER_00"}, {"text": "do", "start": 35.98, "end": 36.1, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "with", "start": 36.1, "end": 36.2, "confidence": 0.987, "speaker": "SPEAKER_00"}, {"text": "it.", "start": 36.2, "end": 36.24, "confidence": 0.916, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 18, "seek": 2832, "start": 36.24, "end": 39.68, "text": " So I hit them and like, let me work for y'all.", "tokens": [50760, 1406, 314, 2277, 606, 290, 588, 11, 1309, 502, 670, 329, 331, 6, 439, 13, 50937], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.833, "words": [{"text": "So", "start": 36.24, "end": 36.4, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "I", "start": 36.4, "end": 36.52, "confidence": 0.9, "speaker": "SPEAKER_00"}, {"text": "hit", "start": 36.52, "end": 36.66, "confidence": 0.987, "speaker": "SPEAKER_00"}, {"text": "them", "start": 36.66, "end": 36.82, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "and", "start": 36.82, "end": 36.98, "confidence": 0.486, "speaker": "SPEAKER_00"}, {"text": "like,", "start": 36.98, "end": 37.2, "confidence": 0.514, "speaker": "SPEAKER_00"}, {"text": "let", "start": 37.56, "end": 37.68, "confidence": 0.801, "speaker": "SPEAKER_00"}, {"text": "me", "start": 37.68, "end": 37.88, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 37.88, "end": 39.1, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "work", "start": 39.1, "end": 39.24, "confidence": 0.555, "speaker": "SPEAKER_00"}, {"text": "for", "start": 39.24, "end": 39.44, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "y'all.", "start": 39.44, "end": 39.68, "confidence": 0.986, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 19, "seek": 2832, "start": 40.76, "end": 43.32, "text": " The play was cool, but it's like, yeah, fuck that.", "tokens": [50987, 383, 711, 373, 3608, 11, 475, 340, 338, 588, 11, 10194, 11, 5089, 326, 13, 51115], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.85, "words": [{"text": "The", "start": 40.76, "end": 40.9, "confidence": 0.989, "speaker": "SPEAKER_00"}, {"text": "play", "start": 40.9, "end": 41.1, "confidence": 0.846, "speaker": "SPEAKER_00"}, {"text": "was", "start": 41.1, "end": 41.28, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "cool,", "start": 41.28, "end": 41.56, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "but", "start": 41.66, "end": 41.76, "confidence": 0.977, "speaker": "SPEAKER_00"}, {"text": "it's", "start": 41.76, "end": 41.96, "confidence": 0.947, "speaker": "SPEAKER_00"}, {"text": "like,", "start": 41.96, "end": 42.2, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 42.32, "end": 42.48, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "yeah,", "start": 42.48, "end": 42.7, "confidence": 0.243, "speaker": "SPEAKER_00"}, {"text": "fuck", "start": 42.94, "end": 43.1, "confidence": 0.951, "speaker": "SPEAKER_00"}, {"text": "that.", "start": 43.1, "end": 43.32, "confidence": 0.997, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 20, "seek": 2832, "start": 43.34, "end": 44.83, "text": " How much to buy this shit?", "tokens": [51115, 1374, 881, 284, 2822, 428, 7510, 30, 51197], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.954, "words": [{"text": "How", "start": 43.34, "end": 43.48, "confidence": 0.988, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 43.48, "end": 44.0, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "much", "start": 44.0, "end": 44.12, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "to", "start": 44.12, "end": 44.26, "confidence": 0.922, "speaker": "SPEAKER_00"}, {"text": "buy", "start": 44.26, "end": 44.46, "confidence": 0.995, "speaker": "SPEAKER_00"}, {"text": "this", "start": 44.46, "end": 44.64, "confidence": 0.833, "speaker": "SPEAKER_00"}, {"text": "shit?", "start": 44.64, "end": 44.83, "confidence": 0.998, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 21, "seek": 2832, "start": 44.83, "end": 45.64, "text": " What you talkin' about?", "tokens": [51197, 1867, 345, 1561, 259, 6, 546, 30, 51259], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.802, "words": [{"text": "What", "start": 44.83, "end": 44.96, "confidence": 0.895, "speaker": "SPEAKER_00"}, {"text": "you", "start": 44.96, "end": 45.06, "confidence": 0.594, "speaker": "SPEAKER_00"}, {"text": "talkin'", "start": 45.06, "end": 45.46, "confidence": 0.862, "speaker": "SPEAKER_00"}, {"text": "about?", "start": 45.46, "end": 45.64, "confidence": 0.785}], "speaker": "SPEAKER_00"}, {"id": 22, "seek": 2832, "start": 45.98, "end": 47.38, "text": " How much to buy Def Ro first?", "tokens": [51259, 1374, 881, 284, 2822, 2896, 5564, 717, 30, 51359], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.959, "words": [{"text": "How", "start": 45.98, "end": 46.18, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "much", "start": 46.18, "end": 46.32, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "to", "start": 46.32, "end": 46.46, "confidence": 0.989, "speaker": "SPEAKER_00"}, {"text": "buy", "start": 46.46, "end": 46.64, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "Def", "start": 46.64, "end": 46.88, "confidence": 0.833, "speaker": "SPEAKER_00"}, {"text": "Ro", "start": 46.88, "end": 47.08, "confidence": 0.996, "speaker": "SPEAKER_00"}, {"text": "first?", "start": 47.08, "end": 47.38, "confidence": 0.913, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 23, "seek": 2832, "start": 48.14, "end": 49.22, "text": " How much for my masters?", "tokens": [51359, 1374, 881, 329, 616, 18159, 30, 51462], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.931, "words": [{"text": "How", "start": 48.14, "end": 48.34, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "much", "start": 48.34, "end": 48.48, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "for", "start": 48.48, "end": 48.62, "confidence": 0.991, "speaker": "SPEAKER_00"}, {"text": "my", "start": 48.62, "end": 48.8, "confidence": 0.997, "speaker": "SPEAKER_00"}, {"text": "[*]", "start": 48.8, "end": 49.06, "confidence": 0.0, "speaker": "SPEAKER_00"}, {"text": "masters?", "start": 49.06, "end": 49.22, "confidence": 0.71, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}, {"id": 24, "seek": 2832, "start": 50.18, "end": 51.48, "text": " How much for all of the masters?", "tokens": [51462, 1374, 881, 329, 477, 286, 262, 18159, 30, 51542], "temperature": 0.0, "avg_logprob": -0.23926071048707, "compression_ratio": 1.7264573991031391, "no_speech_prob": 0.014520817436277866, "confidence": 0.982, "words": [{"text": "How", "start": 50.18, "end": 50.4, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "much", "start": 50.4, "end": 50.54, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "for", "start": 50.54, "end": 50.7, "confidence": 0.998, "speaker": "SPEAKER_00"}, {"text": "all", "start": 50.7, "end": 50.88, "confidence": 0.999, "speaker": "SPEAKER_00"}, {"text": "of", "start": 50.88, "end": 50.98, "confidence": 0.971, "speaker": "SPEAKER_00"}, {"text": "the", "start": 50.98, "end": 51.08, "confidence": 0.919, "speaker": "SPEAKER_00"}, {"text": "masters?", "start": 51.08, "end": 51.48, "confidence": 0.994, "speaker": "SPEAKER_00"}], "speaker": "SPEAKER_00"}], "language_code": "en"}
\ No newline at end of file
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index afaeae4..0509e57 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -1,6 +1,9 @@
+import math
 import json
 import numpy.testing as nptest
+import pytest
 import textgrid
+import warnings
 
 import formatter
 
@@ -8,20 +11,34 @@ class TestFormatter():
     Format = formatter.Formatter()
 
     def test_to_TextGrid(self):
-        for input_fname, ex_fname in self.provide_to_TextGrid():
+        for input_fname, by_phrase in self.provide_to_TextGrid():
             with open(input_fname) as f:
                 case = json.load(f)
-            observed = self.Format.to_TextGrid(case)
+            observed = self.Format.to_TextGrid(case, by_phrase=by_phrase)
             
-            expected = textgrid.TextGrid()
-            expected.read(ex_fname)
+            assert observed.maxTime is not None
+            assert len(observed.tiers) > 0
 
-            nptest.assert_array_equal(observed,expected)
+    def test_no_speaker_warning(self):
+        for input_fname in self.provide_no_speaker_warning():
+            with open(input_fname) as f:
+                case = json.load(f)
+            with pytest.warns(UserWarning, match="No speaker for segment") as record:
+                _ = self.Format.to_TextGrid(case, by_phrase=False)
 
     def provide_to_TextGrid(self):
         return [
                 (
-                    'tests/data/TestAudio_SnoopDogg_85SouthMedia_segments.json',
-                    'tests/data/TestAudio_SnoopDogg_85SouthMedia.TextGrid'
+                    'tests/data/TestAudio_SnoopDogg_85SouthMedia_WhisperTimestampSegments.json',
+                    True
+                ),
+                (
+                    'tests/data/TestAudio_SnoopDogg_85SouthMedia_WhisperTimestampSegments.json',
+                    False
                 ),
             ]
+
+    def provide_no_speaker_warning(self):
+        return [
+                'tests/data/TestAudio_SnoopDogg_85SouthMedia.json',
+            ]