forked from sanyaade-machine-learning/Transana
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TranscriptEditor_RTC.py
3943 lines (3457 loc) · 206 KB
/
TranscriptEditor_RTC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (C) 2003 - 2015 The Board of Regents of the University of Wisconsin System
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
"""This module implements the TranscriptEditor class using the wxRichTextCtrl as part of the Transcript Editor
component. """
__author__ = 'David Woods <[email protected]>'
DEBUG = False
if DEBUG:
print "TranscriptEditor_RTC DEBUG is ON."
SHOWHIDDEN = False
if SHOWHIDDEN:
print "TranscriptEditor_RTC SHOWHIDDEN is ON."
# Import wxPython
import wx
# Import the Rich Text Control
from RichTextEditCtrl_RTC import RichTextEditCtrl
# import wxPython's RichTextCtrl
import wx.richtext as richtext
# Import the Format Dialog
import FormatDialog
# Import the Transana Transcript Object
import Transcript
# Import the Transana Drag and Drop infrastructure
import DragAndDropObjects
# Import Transana's Dialogs
import Dialogs
# import Transana's Document object
import Document
# Import Transana's Episode and Clip Objects
import Episode, Clip
# Import Transana's Quote object
import Quote
# Import Transana's Constants
import TransanaConstants
# import Transana Exceptions
import TransanaExceptions
# Import Transana's Global variables
import TransanaGlobal
# import the RTC version of the Transcript User Interface module
import TranscriptionUI_RTC
# import TextReport module
import TextReport
# Import Transana's Miscellaneous functions
import Misc
# Import Python's Regular Expression handler
import re
# Import Python's cPickle module
import cPickle
# Import Python's pickle module
import pickle
# import fast string IO handling
import cStringIO
# Import Python's os module
import os
# Import Python's string module
import string
# Import Python's sys module
import sys
# Import Python's types module
import types
# This character is interpreted as a timecode marker in transcripts
TIMECODE_CHAR = TransanaConstants.TIMECODE_CHAR
# Nate's original REGEXP, "\xA4<[^<]*>", was not working correctly.
# Given the string "this \xA4<1234> is a string > with many characters",
# Nate's REGEXP returned "\xA4<1234> is a string >"
# rather than the desired "\xA4<1234>".
# My REGEXP "\xA4<[\d]*>" appears to do that.
TIMECODE_REGEXP = "%s<[\d]*>" % TIMECODE_CHAR # "\xA4<[^<]*>"
class TranscriptEditor(RichTextEditCtrl):
"""This class is a word processor for transcribing and editing. It
provides only the actual text editing control, without any external GUI
components to aid editing (such as a toolbar)."""
def __init__(self, parent, id=-1, stylechange_cb=None, updateSelectionText=False, pos=None, suppressGDIWarning = False):
"""Initialize an TranscriptEditor object."""
# Initialize with the RTC RichTextEditCtrl object
RichTextEditCtrl.__init__(self, parent, pos = pos, suppressGDIWarning = suppressGDIWarning)
# Remember initialization parameters
self.parent = parent
self.StyleChanged = stylechange_cb
self.updateSelectionText = updateSelectionText
# There are times related to right-click play control when we need to remember the cursor position.
# Create a variable to store that information, initialized to 0
self.cursorPosition = 0
# Let's track the transcript length, required for altering Quote Positions while editing Documents!
self.documentLength = 0
# Initialize CanDrag to False. If Drag is allowed, it will be enabled later
self.canDrag = False
# Define the Regular Expression that can be used to find Time Codes
self.HIDDEN_REGEXPS = [re.compile(TIMECODE_REGEXP),]
# Indicate whether Time Code Symbols are shown, default to NOT
self.codes_vis = 0
# Indicate whether Time Code Data is shown, default to NOT
self.timeCodeDataVisible = False
# Initialize the Transcript Object to be held in this Transcript Editor
self.TranscriptObj = None
# For Partial Transcript Loading, we need to track the number of lines loaded in the Text Control
self.LinesLoaded = 0
# Initialize the Time Codes array to empty
self.timecodes = []
# Initialize the current time code to DOES NOT EXIST
self.current_timecode = -1
# Create the AutoSave Timer
self.autoSaveTimer = wx.Timer()
# Define the Time Event
self.autoSaveTimer.Bind(wx.EVT_TIMER, self.OnAutoSave)
# We should start out in Read Only mode so that we get Word Tracking
self.set_read_only(True)
# Initialize Mouse Position to avoid problems later
self.mousePosition = None
# make the Transcript Editor a Drop Target
dt = TranscriptEditorDropTarget(self)
self.SetDropTarget(dt)
# NOTE: These Bindings have been removed to prevent DUPLICATE CALLS to the methods!!!
# We need to trap both the EVT_KEY_DOWN and the EVT_CHAR event.
# EVT_KEY_DOWN processes NON-ASCII keys, such as cursor keys and Ctrl-key combinations.
# All characters are reported as upper-case.
# wx.EVT_KEY_DOWN(self, self.OnKeyDown)
# EVT_CHAR is used to detect normal typing. Characters are case sensitive here.
# wx.EVT_CHAR(self, self.OnKey)
# We need to catch EVT_KEY_UP as well
wx.EVT_KEY_UP(self, self.OnKeyUp)
# EVT_LEFT_DOWN is used to detect the left mouse button going down. Needed (with Left_Up) for unselecting selections.
# wx.EVT_LEFT_DOWN(self, self.OnLeftDown)
# EVT_LEFT_UP is used to detect the left click positioning in the Transcript.
# wx.EVT_LEFT_UP(self, self.OnLeftUp)
# EVT_MOTION is used to detect mouse motion
# self.Bind(wx.EVT_MOTION, self.OnMotion)
# Added for tracking Document Edits, specifically to manage Quote Positions
self.Bind(richtext.EVT_RICHTEXT_CONTENT_INSERTED, self.OnContentChanged)
self.Bind(richtext.EVT_RICHTEXT_CONTENT_DELETED, self.OnContentChanged)
# This causes the Transana Transcript Window to override the default
# RichTextEditCtrl right-click menu. Transana needs the right-click
# for play control rather than an editing menu.
self.Bind(wx.EVT_RIGHT_DOWN, self.OnRightDown)
self.Bind(wx.EVT_RIGHT_UP, self.OnRightUp)
self.Bind(wx.EVT_ACTIVATE, self.OnActivate)
def OnActivate(self, event):
print "TranscriptEditor.OnActivate called"
event.Skip()
# Public methods
def load_transcript(self, transcript):
""" Load the given transcript object or RTF file name into the editor. """
# Remember Partial Transcript Editing status
tmpPartialTranscriptEdit = TransanaConstants.partialTranscriptEdit
# Temporarily turn partial transcript editing off
TransanaConstants.partialTranscriptEdit = False
# Create a popup telling the user about the load (needed for large files)
loadDlg = Dialogs.PopupDialog(None, _("Loading..."), _("Loading your transcript.\nPlease wait...."))
# Freeze the control to speed transcript load / RTF Import times
self.Freeze()
# Suppress Undo tracking
self.BeginSuppressUndo()
# prepare the buffer for the incoming data.
self.ClearDoc()
# The control needs to be editable to insert a transcript!
self.set_read_only(False)
# The Transcript should already have been saved or cleared by this point.
# This code should never get activated. It's just here for safety's sake.
if self.TranscriptObj:
# Save the transcript
if TransanaConstants.partialTranscriptEdit:
self.parent.ControlObject.SaveTranscript(1, continueEditing=False)
else:
self.parent.ControlObject.SaveTranscript(1)
# If you have the Transcript locked, then load something else without leaving
# Edit Mode, you need to unlock the record!
if self.TranscriptObj.isLocked:
self.TranscriptObj.unlock_record()
# Disable widget while loading transcript
self.Enable(False)
# Let's figure out what sort of transcript we have
# If a STRING is passed in, we probably have a file name, not a Transcript Object.
if isinstance(transcript, types.StringTypes):
dataType = 'filename'
# If we have an empty transcript or a TEXT file ...
elif transcript.text[:4] == 'txt\n':
dataType = 'text'
# If we have a transcript in XML format ...
elif transcript.text[:5] == '<?xml':
dataType = 'xml'
# If we have a transcript in Rich Text Format ...
elif transcript.text[2:5] == u'rtf':
dataType = 'rtf'
# If we are creating a Transcript-less Clip ...
elif (transcript.text == '') or transcript.text[0:24] == u'<(transcript-less clip)>':
dataType = 'transcript-less clip'
# Otherwise, we probably have a Styled Text Ctrl object that's been pickled (Transana 2.42 and earlier)
else:
dataType = 'pickle'
# dataType should only ever be "pickle", "text", "rtf", "xml" or "filename"
# If we are dealing with a Plain Text document ...
if dataType == 'text':
# If the transcript has the TXT indicator ...
if transcript.text[:4] == 'txt\n':
# ... we need to remove that!
text = transcript.text[4:]
else:
text = transcript.text
# Let's scan the file for characters we need to handle. Start at the beginning of the file.
# NOTE: This is a very preliminary implementation. It only deals with English, and only with ASCII or UTF-8
# encoding of time codes (chr(164) or chr(194) + chr(164)).
pos = 0
# Keep working until we get to the end of the file.
while pos < len(text):
# if we have a non-English character, one the ASCII encoding can't handle ...
if (ord(text[pos]) > 127):
# If we have a Time Code character (chr(164)) ...
if (ord(text[pos]) == 164):
# ... we can let this pass. We know how to handle this.
pass
# In UTF-8 Encoding, the time code is a PAIR, chr(194) + chr(164). If we see this ...
elif (ord(text[pos]) == 194) and (ord(text[pos + 1]) == 164):
# ... then let's drop the chr(194) part of things. At the moment, we're just handling ASCII.
text = text[:pos] + text[pos + 1:]
# If it's any other non-ASCII character (> 127) ...
else:
# ... replace it with a question mark for the moment
text = text[:pos] + '?' + text[pos + 1:]
# Increment the position indicator to move on to the next character.
pos += 1
# As long as there is text to process ...
while len(text) > 0:
# Look for Time Codes
if text.find(chr(164)) > -1:
# Take the chunk of text before the next time code and isolate it
chunk = text[:text.find(chr(164))]
# remove that chuck of text from the rest of the text
# skip the time code character and the opening bracket ("<")
text = text[text.find(chr(164)) + 2:]
# Grab the text up to the closing bracket (">"), which will be the time code data.
timeval = text[:text.find('>')]
# Remove the time code data and the closing bracket from the remaining text
text = text[text.find('>')+1:]
# Add the text chunk to the Transcript
self.WriteText(chunk)
# Add the time code (with data) to the Transcript
self.insert_timecode(int(timeval))
# if there are no more time codes in the text ...
else:
# ... add the rest of the text to the Transcript ...
self.WriteText(text)
# ... and clear the text variable to signal that we're done.
text = ''
# Set the Transcript to the Editor's TranscriptObj
self.TranscriptObj = transcript
# Indicate the Transcript hasn't been edited yet.
self.TranscriptObj.has_changed = 0
# If we have an Episode Transcript in TXT Form, save it in FastSave format upon loading
# to convert it.
self.TranscriptObj.lock_record()
if TransanaConstants.partialTranscriptEdit:
self.save_transcript(continueEditing=False)
else:
self.save_transcript()
self.TranscriptObj.unlock_record()
# If we have an XML document, let's assume it's XML from Transana
elif dataType == 'xml':
# Load the XML Data held in the transcript's text field
self.LoadXMLData(transcript.text)
# The transcript that was passed in is our Transcript Object
self.TranscriptObj = transcript
# Initialize that the transcript has not yet changed.
self.TranscriptObj.has_changed = 0
# if we have a wxSTC-style pickled transcript object ...
elif dataType == 'pickle':
# import the STC Transcript Editor
import TranscriptEditor_STC
# Create an invisible STC-based Transcript Editor
invisibleSTC = TranscriptEditor_STC.TranscriptEditor(self.parent)
# HIDE the invisible editor
invisibleSTC.Show(False)
# Load the STC-style picked data into the STC Editor
invisibleSTC.load_transcript(transcript, 'pickle')
# Convert the STC contents to Rich Text Format
transcript.text = invisibleSTC.GetRTFBuffer()
# Destroy the STC-based Editor
invisibleSTC.Destroy()
# If we have a transcript-less Clip ...
elif dataType == 'transcript-less clip':
# ... then it should have not Transcript Text !!!
transcript.text = ''
# The transcript that was passed in is our Transcript Object
self.TranscriptObj = transcript
# Initialize that the transcript has not yet changed.
self.TranscriptObj.has_changed = 0
# THIS SHOULD NOT BE AN ELIF.
# If we have a filename, RichTextFormat, or a pickle that has just been converted to RTF ...
if dataType in ['filename', 'rtf', 'pickle']:
# looks like this is an RTF text file or an rtf transcript.
# was the given transcript object simply a filename?
# NOTE: This should NEVER occur within Transana!
if isinstance(transcript, types.StringTypes):
# Load the document
self.LoadDocument(transcript)
# We don't have a Transana Transcript Object in this case.
self.TranscriptObj = None
# Is the given transcript a Transcript Object?
else:
# Destroy the Load Popup Dialog
loadDlg.Destroy()
# Load the Transcript Text using the RTF Data processor
self.LoadRTFData(transcript.text)
# The transcript that was passed in is our Transcript Object
self.TranscriptObj = transcript
# Initialize that the transcript has not yet changed.
self.TranscriptObj.has_changed = 0
# Create a popup telling the user about the load (needed for large files)
loadDlg = Dialogs.PopupDialog(None, _("Loading..."), _("Loading your transcript.\nPlease wait...."))
# Hide the Time Code Data, which may be visible for RTF and TXT data
self.HideTimeCodeData()
# If we have a Transcript in RTF Form, save it in FastSave format upon loading
# to convert it.
try:
# this was added in to automatically convert an RTF document into
# the fastsave format.
self.TranscriptObj.lock_record()
# If Partial Transcript Editing is enabled ...
if TransanaConstants.partialTranscriptEdit:
# ... save the transcript
self.save_transcript(continueEditing=False)
# If Partial Transcript Editing is NOT enabled ...
else:
# ... save the transcript
self.save_transcript()
# Unlock the transcript
self.TranscriptObj.unlock_record()
except:
# Note the failure in the Error Log
print "TranscriptEditor_RTC.load_transcript(): SAVE AFTER CONVERSION FAILED."
print sys.exc_info()[0]
print sys.exc_info()[1]
import traceback
traceback.print_exc(file=sys.stdout)
print
# Restore Partial Transcript Editing status
TransanaConstants.partialTranscriptEdit = tmpPartialTranscriptEdit
# Scan transcript for timecodes
self.load_timecodes()
# Re-enable widget
self.Enable(True)
# Set the Transcript to Read Only initially so that the highlight will scroll as the media plays
self.set_read_only(True)
# If the transcript contains time codes ...
if len(self.timecodes) > 0:
# ... go to the first time code
if self.scroll_to_time(self.timecodes[0]):
# Get the style of the time code
tmpStyle1 = self.GetStyleAt(self.GetInsertionPoint())
# if the Time Code's style is HIDDEN ...
if self.CompareFormatting(tmpStyle1, self.txtHiddenAttr, False):
# ... display Time Codes
self.show_codes()
# Time codse should be showing now.
self.codes_vis = 1
# Go to the start of the Transcript
self.GotoPos(0)
# If we are in the Transcript Dialog, which HAS a toolbar ...
# (When this is called from the Clip Properties form, there is no tool bar!)
if isinstance(self.parent, TranscriptionUI_RTC._TranscriptPanel):
# ... make sure the Toolbar's buttons reflect the current display
self.parent.toolbar.ToggleTool(self.parent.parent.parent.parent.CMD_SHOWHIDE_ID, True)
# Check Formatting to set initial Default and Basic Style info
self.CheckFormatting()
# Now that the transcript is loaded / imported, we can thaw the control
self.EndSuppressUndo()
self.Thaw()
# Mark the Edit Control as unmodified.
self.DiscardEdits()
if isinstance(transcript, Transcript.Transcript):
# Implement Minimum Transcript Width by setting size hints for the TranscriptionUI dialog
self.parent.SetSizeHints(minH = 0, minW = self.TranscriptObj.minTranscriptWidth)
# Destroy the Load Popup Dialog
loadDlg.Destroy()
# if Partial Transcript Editing is enabled ...
if TransanaConstants.partialTranscriptEdit:
# ... we need to track the lines that are loaded
self.LinesLoaded = self.TranscriptObj.paragraphs
def UpdateCurrentContents(self, action):
""" This method maintains a LIMITED load of data in the editor control, rather than having all
the data present all the time. In wxPython 2.9.4.0 and 3.0.0.0, the wxRichTextCtrl becomes
VERY slow during editing for very large documents. (eg. a 7000 line document can take 4
seconds per key press near the beginning of the document!) """
# Set the number of lines that should be included in a transcript segment loaded into the editor
numberOfLinesInControl = 200
# If no Transcript Object is defined ...
if self.TranscriptObj == None:
# ... we can skip this!
return
# If we're entering edit mode, we need to limit the amount of text in the control ...
if action == 'EnterEditMode':
# ... and if the transcript has over numberOfLinesInControl lines long AFTER the current window ...
if self.NumberOfLines - self.PositionToXY(self.HitTestPos((3, self.GetRect()[3] - 10))[1])[1] > numberOfLinesInControl:
# ... determine the number of lines to load into the control
linesToLoad = self.PositionToXY(self.HitTestPos((3, self.GetRect()[3] - 10))[1])[1]
linesToLoad = linesToLoad - (linesToLoad % numberOfLinesInControl) + numberOfLinesInControl
# If we should load fewer than ALL the lines ...
if linesToLoad < self.TranscriptObj.paragraphs:
# Create a temporary popup dialog ...
loadDlg = Dialogs.PopupDialog(self, _("Loading %d lines") % linesToLoad, _("Loading your transcript.\nPlease wait...."))
# Initialize text
text = ''
# Add the appropriate number of lines (i.e. paragraphs)
for x in range(self.TranscriptObj.paragraphPointers[linesToLoad]):
text += self.TranscriptObj.lines[x] + '\n'
# Add closing XML to made our text a LEGAL XML document
text += ' </paragraphlayout>\n'
text += '</richtext>'
# Load the XML Data held in the transcript's text field
self.LoadXMLData(text, clearDoc=False)
# Delete the popup dialog.
loadDlg.Destroy()
# Update the indicator for the number of lines loaded
self.LinesLoaded = linesToLoad
# If we shouls load ALL the lines ...
else:
# ... update the indicator for the number of lines loaded to the total number of paragraphs
self.LinesLoaded = self.TranscriptObj.paragraphs
# Otherwise ...
else:
# ... update the indicator for the number of lines loaded to the total number of paragraphs
self.LinesLoaded = self.TranscriptObj.paragraphs
# if we're leaving Edit mode, we need to recover the text that had been left out of the control ...
elif action == 'LeaveEditMode':
# if there are lines beyond what is currently in the Editor control ...
if self.LinesLoaded > 0 and self.LinesLoaded != self.TranscriptObj.paragraphs:
# If the transcript has been changed ...
if self.IsModified():
# ... get the text currently in the control ...
currenttext = self.GetFormattedSelection('XML')
# ... and break it into lines
currentlines = currenttext.split('\n')
# Delete the last TWO lines from the loaded text, as they close off the XML too early
del(currentlines[-1])
del(currentlines[-1])
# See if the (formerly) 3rd to last line closes a ParagraphLayout XML tag set
if currentlines[-1].strip() == '</paragraphlayout>':
# ... and if so, delete that too!
del(currentlines[-1])
# For all of the original Transcript that falls AFTER what we have loaded in the Text Control ...
for x in range(self.TranscriptObj.paragraphPointers[self.LinesLoaded], len(self.TranscriptObj.lines)):
# ... add these lines to what we got from the Text Control.
currentlines.append(self.TranscriptObj.lines[x])
# re-initialize CurrentText
currenttext = ""
# Concatenate all the current LINES into the current TEXT
for x in range(len(currentlines)):
currenttext += currentlines[x] + '\n'
# Now load the cumulated text into the Text Control
self.LoadXMLData(currenttext, clearDoc=False)
# Note that the text HAS changed in the Text Control
self.MarkDirty()
# If the transcript has NOT been changed ...
else:
# ... restore the original transcript's text to the Text Control
self.LoadXMLData(self.TranscriptObj.text, clearDoc = False)
def HideTimeCodeData(self):
""" Hide the Time Code Data, which should NEVER be visible to users """
# Let's look for time codes and hide the data
# NOTE: This will ONLY apply to RTF transcripts exported by Transana 2.42 and earlier. By definition,
# these transcripts CANNOT have images, and therefore the offset values returned by FindText will
# be adequate. Transcript that might have time codes AND images will have the time codes already
# formatted correctly, so if some time codes get skipped below, it doesn't matter.
# Start at the beginning of the text
pos = 0
# Run to the end of the text
endPos = self.GetTextLength()
# Find the first time code character
nextTC = self.FindText(pos, endPos, TIMECODE_CHAR)
# As long as there are additional time codes to find ...
while nextTC > -1:
# Find the END of the time code we just found
endTC = self.FindText(nextTC, endPos, '>')
# Select the time code character itself
self.SetSelection(nextTC + 1, endTC + 1)
# NOTE: That doesn't work quite right for transcripts exported to RTF from Transana 2.50. We
# need an additional correction here! Otherwise, every other time-coded segment is hidden!
#
# Get the TEXT of the current selection
tmp = self.GetStringSelection()
# If there is a ">" character BEFORE THE END of the text ...
if len(tmp) > tmp.find('>') + 1:
# ... then we need to CORRECT the END TIME CODE position marker.
endTC = nextTC + tmp.find('>') + 1
# Format the Time Code using the Time Code style
self.SetStyle(richtext.RichTextRange(nextTC, nextTC + 1), self.txtTimeCodeAttr)
# Format the Time Code data using the Hidden style
self.SetStyle(richtext.RichTextRange(nextTC + 1, endTC + 1), self.txtHiddenAttr)
# Find the next time code, if there is one
nextTC = self.FindText(endTC, endPos, TIMECODE_CHAR)
def load_timecodes(self):
"""Scan the document for timecodes and add to internal list."""
# Clear the existing time codes list
self.timecodes = []
# Get the text to scan
txt = self.GetText()
# Define the string to search for
findstr = TIMECODE_CHAR + "<"
# Locate the time code using string.find()
i = txt.find(findstr, 0)
# As long as there are more time codes to find ...
while i >= 0:
# ... find the END of the time code data, i.e. the next ">" character
endi = txt.find(">", i)
# Extract the Time Code Data
timestr = txt[i+2:endi]
# Trap exceptions
try:
# Conver the time code data to an integer and add it to the TimeCodes list
self.timecodes.append(int(timestr))
# If an exception arises (because of inability to convert the time code) ...
except:
# ... then just ignore that time code. It's probably defective.
pass
# Look for the next time code. Result will be -1 if NOT FOUND
i = txt.find(findstr, i+1)
def save_transcript(self, continueEditing=True):
""" Save the transcript to the database.
continueEditing is used for Partial Transcript Editing only. """
# Create a popup telling the user about the save (needed for large files)
self.saveDlg = Dialogs.PopupDialog(None, _("Saving..."), _("Saving your transcript.\nPlease wait...."))
# Let's try to remember the cursor position
self.SaveCursor()
# If Partial Transcript editing is enabled ...
if TransanaConstants.partialTranscriptEdit:
# If we have only part of the transcript in the editor, we need to restore the full transcript
self.UpdateCurrentContents('LeaveEditMode')
# We can't save with Time Codes showing! Remember the initial status, and hide them
# if they are showing.
initCodesVis = self.codes_vis
if not initCodesVis:
self.show_codes()
# We shouldn't save with Time Code Values showing! Remember the initial status for later.
initTimeCodeValueStatus = self.timeCodeDataVisible
# If Time Code Values are showing ...
if self.timeCodeDataVisible:
# ... then hide them for now.
self.changeTimeCodeValueStatus(False)
# Start exception handling in case there's a problem with the Save
try:
# If we have a defined Transcript Object ...
if self.TranscriptObj:
# Note whether the transcript has changed
self.TranscriptObj.has_changed = self.modified()
# Get the transcript data in XML format
self.TranscriptObj.text = self.GetFormattedSelection('XML')
# Specify the Document Length in Characters (for Documents)
self.TranscriptObj.document_length = self.GetLength()
# Write it to the database
self.TranscriptObj.db_save()
except TransanaExceptions.SaveError, e:
raise
except:
print "TranscriptEditor_RTC.save_transcript():"
print sys.exc_info()[0]
print sys.exc_info()[1]
import traceback
traceback.print_exc()
raise
# We need to finish this even if an exception is raised!
finally:
# If time codes were showing, show them again.
if not initCodesVis:
self.hide_codes()
# If Time Code Values were showing, show them again.
if initTimeCodeValueStatus:
self.changeTimeCodeValueStatus(True)
# Let's try restoring the Cursor Position when all is said and done.
self.RestoreCursor()
# Mark the Edit Control as unmodified.
self.DiscardEdits()
# Destroy the Save Popup Dialog
self.saveDlg.Destroy()
# If Partial Transcript editing is enabled ...
if TransanaConstants.partialTranscriptEdit and continueEditing:
# If we have only part of the transcript in the editor, we need to restore the partial transcript state following save
self.UpdateCurrentContents('EnterEditMode')
def export_transcript(self, fname):
"""Export the transcript to an RTF file."""
# If Partial Transcript editing is enabled ...
if TransanaConstants.partialTranscriptEdit:
# If we have only part of the transcript in the editor, we need to restore the full transcript
self.UpdateCurrentContents('LeaveEditMode')
self.Refresh()
# See if there are any time codes in the text. If not ...
if self.timecodes == []:
# ... then we can ignore the whole issue of time-code stripping
result = wx.ID_YES
# If there ARE time codes ...
else:
# We want to ask the user whether we should include time codes or not. Create the prompt
prompt = unicode(_("Do you want to include Transana Time Codes (and their hidden data) in the file?\n(This preserves time codes when you re-import the transcript into Transana.)"), "utf8")
# Create a dialog box for the question
dlg = Dialogs.QuestionDialog(self.parent, prompt)
# Display the dialog box and get the user response
result = dlg.LocalShowModal()
# Destroy the dialog box
dlg.Destroy()
# If the user does NOT want Time Codes ...
if result == wx.ID_NO:
# Remember the Edited Status of the RTC
isModified = self.IsModified()
# Get the contents of the RTC buffer
originalText = self.GetFormattedSelection('XML')
# Remove the Time Codes
strippedText = self.StripTimeCodes(originalText)
# Now put the altered contents of the buffer back into the control!
# (The RichTextXMLHandler automatically clears the RTC.)
try:
# Create an IO String of the stripped text
stream = cStringIO.StringIO(strippedText)
# Create an XML Handler
handler = richtext.RichTextXMLHandler()
# Load the XML text via the XML Handler.
# Note that for XML, the RTC BUFFER is passed.
handler.LoadStream(self.GetBuffer(), stream)
# exception handling
except:
import traceback
print "XML Handler Load failed"
print
print sys.exc_info()[0], sys.exc_info()[1]
print traceback.print_exc()
print
pass
# If saving an RTF file ...
if fname[-4:].lower() == '.rtf':
# ... save the document in Rich Text Format
self.SaveRTFDocument(fname)
# If saving an XML file ...
elif fname[-4:].lower() == '.xml':
# ... save the document in XML format
self.SaveXMLDocument(fname)
# If the user did NOT want Time Codes ...
if result == wx.ID_NO:
# ... we need to put the original contents of the buffer back into the control!
try:
# Create an IO String of the stripped text
stream = cStringIO.StringIO(originalText)
# Create an XML Handler
handler = richtext.RichTextXMLHandler()
# Load the XML text via the XML Handler.
# Note that for XML, the RTC BUFFER is passed.
handler.LoadStream(self.GetBuffer(), stream)
# exception handling
except:
import traceback
print "XML Handler Load failed"
print
print sys.exc_info()[0], sys.exc_info()[1]
print traceback.print_exc()
print
pass
# If the RTC was modified BEFORE this export ...
if isModified:
# ... then mark it as modified
self.MarkDirty()
# If the RTC was NOT modified before this export ...
else:
# ... then mark it as clean. (yeah, one of these is probably not needed.)
self.DiscardEdits()
# If Partial Transcript editing is enabled ...
if TransanaConstants.partialTranscriptEdit:
# If we have only part of the transcript in the editor, we need to restore the partial transcript state following save
self.UpdateCurrentContents('EnterEditMode')
def set_font(self, font_face, font_size, font_fg=0x000000, font_bg=0xffffff):
"""Change the current font or the font for the selected text."""
self.SetFont(font_face, font_size, font_fg, font_bg)
def set_bold(self, enable=-1):
"""Set bold state for current font or for the selected text.
If enable is not specified as 0 or 1, then it will toggle the
current bold state."""
if self.get_read_only():
return
if enable == -1:
enable = not self.GetBold()
self.SetBold(enable)
def get_bold(self):
""" Report the current setting for BOLD """
return self.GetBold()
def set_italic(self, enable=-1):
"""Set italic state for current font or for the selected text."""
if self.get_read_only():
return
if enable == -1:
enable = not self.GetItalic()
self.SetItalic(enable)
def get_italic(self):
""" Report the current setting for ITALICS """
return self.GetItalic()
def set_underline(self, enable=-1):
"""Set underline state for current font or for the selected text."""
if self.get_read_only():
return
if enable == -1:
enable = not self.GetUnderline()
self.SetUnderline(enable)
def get_underline(self):
""" Report the current setting for UNDERLINE """
return self.GetUnderline()
def show_codes(self, showPopup=False):
"""Make encoded text in document visible."""
if showPopup:
# Create a popup telling the user about the change (needed for large files)
tmpDlg = Dialogs.PopupDialog(None, _("Showing Time Codes..."), _("Showing time codes.\nPlease wait...."))
self.changeTimeCodeHiddenStatus(False)
self.codes_vis = 1
if showPopup:
# Destroy the Popup Dialog
tmpDlg.Destroy()
def hide_codes(self, showPopup=False):
"""Make encoded text in document visible."""
if showPopup:
# Create a popup telling the user about the change (needed for large files)
tmpDlg = Dialogs.PopupDialog(None, _("Hiding Time Codes..."), _("Hiding time codes.\nPlease wait...."))
self.changeTimeCodeHiddenStatus(True)
self.codes_vis = 0
if showPopup:
# Destroy the Popup Dialog
tmpDlg.Destroy()
def show_timecodevalues(self, visible):
""" Make Time Code value in Human Readable form visible or hidden """
# Create a popup telling the user about the change (needed for large files)
if not visible:
tmpDlg = Dialogs.PopupDialog(None, _("Hiding Time Codes..."), _("Hiding time code values.\nPlease wait...."))
else:
tmpDlg = Dialogs.PopupDialog(None, _("Showing Time Codes..."), _("Showing time code values.\nPlease wait...."))
# Just passing through.
self.changeTimeCodeValueStatus(visible)
# Destroy the Popup Dialog
tmpDlg.Destroy()
def codes_visible(self):
"""Return 1 if encoded text is visible."""
return self.codes_vis
def changeTimeCodeHiddenStatus(self, hiddenVal):
""" Changes the Time Code marks (but not the time codes themselves) between visible and invisble styles. """
# Let's also remember if the transcript has already been modified. This value WILL get changed, but maybe it shouldn't be.
initModified = self.modified()
# Let's try to remember the cursor position. (self.SaveCursor() doesn't work here!)
(savedPosition, savedSelection) = (self.GetCurrentPos(), self.GetSelection())
# Move the cursor to the beginning of the document
self.GotoPos(0)
# Let's find each time code mark and update it with the new style.
for tc in self.timecodes:
# Find the Timecode. scroll_to_time() adjusts the time code by 2 ms, so we have to compensate for that here!
if self.scroll_to_time(tc - 2):
# Note the Cursor's Current Position
curpos = self.GetCurrentPos() + 1
# The time code in position 0 of the document doesn't get hidden correctly! This adjusts for that!
if curpos < 1:
curpos = 1
# Get the range for the Time Code character itself. It starts the character BEFORE the insertion point.
r = richtext.RichTextRange(curpos - 1, curpos)
# If we're hiding time codes ...
if hiddenVal:
# ... set its style to Hidden
self.SetStyle(r, self.txtHiddenAttr)
# If we're displaying time codes ...
else:
# ... set its style to Time Code
self.SetStyle(r, self.txtTimeCodeAttr)
# Restore the Cursor Position when all is said and done. (self.RestoreCursor() doesn't work!)
# If there's no saved Selection ...
if savedSelection == (-2, -2):
# ... make a small selection based on the saved position. If it's not the last character in the document ...
if savedPosition < self.GetLastPosition():
# ... select the next character
self.SetSelection(savedPosition, savedPosition+1)
# If it IS the last character and the transcript has at least one character ...
elif savedPosition > 0:
# ... select the previous character
self.SetSelection(savedPosition - 1, savedPosition)
# Show the current selection
self.ShowCurrentSelection()
# Now clear the selection we just made
self.SetCurrentPos(savedPosition)
# if there IS a saved selection ...
else:
# ... select what used to be selected ...
self.SetSelection(savedSelection[0], savedSelection[1])
# ... and show the current selection
self.ShowCurrentSelection()
# Start Exception Handling
try:
# Update the Transcript Control
self.Update()
# If there's a PyAssertionError ...
except wx._core.PyAssertionError, x:
# ... we can safely ignore it!
pass
# If we did not think the document was modified before we showed the time code data ...
if not initModified:
# ... then mark the data as unchanged.
self.DiscardEdits()
def changeTimeCodeValueStatus(self, visible):
""" Change visibility of the Time Code Values """
# Set the Wait cursor
self.parent.SetCursor(wx.StockCursor(wx.CURSOR_WAIT))
# We can change this even if the Transcript is read-only, but we need to remember the current
# state so we can return the transcript to read-only if needed.
# The whole SaveCursor() / RestoreCursor() thing doesn't work well here, because the character numbers just change too
# much. Let's maintain transcript position by time code instead.
(tcBefore, tcAfter) = self.get_selected_time_range()
initReadOnly = self.get_read_only()
# Let's also remember if the transcript has already been modified. This value WILL get changed, but maybe it shouldn't be.
initModified = self.modified()
# Let's show all the hidden text of the time codes. This doesn't work without it!
if not self.codes_vis:
# Remember that time codes were hidden ...
wereHidden = True
# ... and show them
self.show_all_hidden()
else:
# Remember that time codes were showing
wereHidden = False
# Put the transcript into Edit mode so we can change it.
self.set_read_only(False)
# Let's iterate through every pre-defined regular expression about time codes. (I think there's only one. I'm not sure why Nate did it this way.)
for tcs in self.HIDDEN_REGEXPS:
# Get a list(?) of all the time code sequences in the text
tcSequences = tcs.findall(self.GetText())
# Initialize the Time Code End position to zero.
tcEndPos = 0
# Now iterate through each Time Code in the RegEx list
for TC in tcSequences:
# Find the next Time Code in the RTF control, starting at the end point of the previous time code for efficiency's sake.
tcStartPos = self.GetValue().find(TC, tcEndPos, self.GetLength()) # self.FindText(tcEndPos, self.GetLength(), TC)
# Remember the end point of the current time code, used to start the next search.
tcEndPos = self.GetValue().find('>', tcStartPos, self.GetLength()) # self.FindText(tcStartPos, self.GetLength(), '>') + 1
tcEndPosAdjusted = self.FindText(tcStartPos, self.GetLength(), TC) + len(TC)
# Move the cursor to the end of the time code's hidden data
self.GotoPos(tcEndPosAdjusted)
# Build the text of the time value. Take parentheses, and add the conversion of the time code data, which is extracted from
# the Time Code from the RegEx.
text = '(' + Misc.time_in_ms_to_str(int(TC[2:-1])) + ')'
# Note the length of the time code text
lenText = len(text)
# print "TrancriptEditor_RTC.changeTimeCodeValueStatus():", tcCounter, len(tcSequences)
# If we're going to SHOW the time code data ...
if visible:
# Insert the text
self.WriteText(text)
self.SetStyle(richtext.RichTextRange(tcEndPosAdjusted, tcEndPosAdjusted + lenText), self.txtTimeCodeHRFAttr)
# If we're gong to HIDE the time code data ...
else:
# Let's look at the end of the time code for the opening paragraph character. This probably signals that the user hasn't
# messed with the text, which they could do. If they mess with it, they're stuck with it!
if self.GetCharAt(tcEndPosAdjusted) == ord('('):
hrtcStartPos = tcEndPosAdjusted
hrtcEndPos = tcEndPosAdjusted + lenText # self.GetValue().find(')', hrtcStartPos, self.GetLength())
# Select the character following the time code data ...
self.SetSelection(hrtcStartPos, hrtcEndPos)
# ... and get rid of it!
self.DeleteSelection()
# Change the Time Code Data Visible flag to indicate the new state
self.timeCodeDataVisible = visible
# We better hide all the hidden text for the time codes again, if they were hidden.
if wereHidden:
self.hide_all_hidden()
# If we were in read-only mode ...
if initReadOnly:
# ... return to read-only mode
self.set_read_only(True)
# If we did not think the document was modified before we showed the time code data ...
if not initModified:
# ... then mark the data as unchanged.
self.DiscardEdits()
# Restore the normal cursor
self.parent.SetCursor(wx.StockCursor(wx.CURSOR_ARROW))
# The cursor position is lost because of the change of the text. Go to the start of the document.
self.SetInsertionPoint(0)
# Call CheckFormatting to update the Format / Font values
self.CheckFormatting()
# Scroll to the starting time code position to make the proper segment of the transcript visible
self.scroll_to_time(tcBefore)
# Start Exception Handling
try:
# Update the Transcript Control
self.Update()