-
Notifications
You must be signed in to change notification settings - Fork 0
/
tv-subtitles.py
executable file
·451 lines (354 loc) · 12.3 KB
/
tv-subtitles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
#!/usr/bin/env python3
"""
Rename subtitle files and move them parallel to video files for a TV series.
Produces exactly one english SRT subtitle per episode.
"""
from __future__ import annotations
import argparse
from functools import total_ordering
import logging
import os.path
from pathlib import Path
from pprint import pprint as pp
import re
import shutil
import sys
logger = logging.getLogger()
@total_ordering
class EpisodeName:
KEY_REGEX = re.compile(r'S(\d+)E(\d+)', flags=re.IGNORECASE)
SUBTITLE_SUFFIX = '.srt'
def __init__(self, name: str):
self.name = name
def get_key(self) -> str|None:
"""
Extract episode's 'key' from file name.
The key is an upper-case string in the format 'S00E00', which
uniquely identifies an episode within a series.
Args:
name:
File name.
Returns:
The key if found, otherwise None.
"""
key = None
if match := self.KEY_REGEX.search(self.name):
key = match.group(0)
return None if key is None else key.upper()
def get_subtitle_name(self) -> str:
"""
Build expected subtitle file name for episode.
Returns:
Expected file name of subtitle.
"""
base, suffix = os.path.splitext(self.name)
subtitle = f"{base}{self.SUBTITLE_SUFFIX}"
return subtitle
def __eq__(self, other: object) -> bool:
if not isinstance(other, EpisodeName):
return NotImplemented
return self.name == other.name
def __lt__(self, other: EpisodeName) -> bool:
if not isinstance(other, EpisodeName):
return NotImplemented
return self.name < other.name
def __repr__(self) -> str:
return f"<{self.__class__.__name__}: {self.name!r}>"
def __str__(self) -> str:
return self.name
class Folder:
"""
Basic folder operations.
"""
def __init__(self, folder: Path):
"""
Initialiser.
Reads contents of folder into properties.
Raises:
RuntimeError:
If given folder does not appear to contain a TV series.
Returns:
None
"""
if not folder.is_dir():
raise RuntimeError(f"Path is not a folder: {folder}")
self.root = folder
self.folders, self.files = self._read_contents(self.root)
def _read_contents(
self,
root: Path
) -> tuple[tuple[str, ...], tuple[str, ...]]:
"""
Find the files and folders under root.
Returns:
Both the folders and files as sorted strings.
"""
folders = []
files = []
for entry in root.iterdir():
# Skip hidden
if entry.name.startswith('.'):
continue
# Build lists
if entry.is_dir():
folders.append(entry.name)
elif entry.is_file():
files.append(entry.name)
else:
logger.warning(f"Ignoring non-regular file: {entry}")
folders.sort(key=str.casefold)
files.sort(key=str.casefold)
return (tuple(folders), tuple(files))
class SeriesFolder(Folder):
"""
A folder containing a TV series.
Expects mulitple video files using the 'S00E00.' naming convention,
as well as various other subtitle and metadata files.
"""
SUBTITLE_SUFFIX = '.srt'
VIDEO_SUFFIXES = ('.mkv', '.mp4', '.webm')
def __init__(self, folder: Path):
"""
Initialiser.
Reads directory contents and runs some basic sanity checks on its contents.
Raises:
RuntimeError:
If given folder does not appear to contain a TV series.
"""
super().__init__(folder)
self.episodes = self._find_episodes(self.files)
self.subtitle_finder = SubtitleFinder(options.folder)
if len(self.episodes) < 2:
raise RuntimeError(f"Folder doesn't contain episodes: {folder}")
logger.info("%s episodes found in %r", len(self.episodes), folder.name)
def __repr__(self) -> str:
return self.__str__()
def __str__(self) -> str:
return f"<{self.__class__.__name__}: {self.root}>"
def copy_subtitle(self, episode: EpisodeName, subtitle: Path) -> None:
"""
Rename and copy subtitle file to the expected location.
Args:
episode:
Episode name.
subtitle:
Path to subtitle file.
Returns:
Nothing.
"""
destination = self.root / episode.get_subtitle_name()
shutil.copy2(subtitle, destination)
logger.info("Create: %s", destination.name)
def find_subtitle(self, episode: EpisodeName) -> Path:
"""
Find a single 'srt' subtitle file for the given episode.
"""
return self.subtitle_finder.find_subtitle(episode)
def has_every_subtitle(self) -> bool:
"""
Does every episode in the folder have a subtitle?
Returns:
True only if a properly-named subtitle exists for all episodes.
"""
have_subtitles = (self.has_subtitle(name) for name in self.episodes)
return all(have_subtitles)
def has_subtitle(self, episode: EpisodeName) -> bool:
return self.subtitle_finder.has_subtitle(episode)
def _find_episodes(self, files: tuple[str, ...]) -> list[EpisodeName]:
"""
Find video files that match the 'S00E00' convention.
Video files are identified just by suffix.
Args:
files:
Tuple of file names.
Raises:
RuntimeError:
If duplicate episode keys are encountered.
Returns:
List of episode file names.
"""
episodes = []
seen = set()
for name in self.files:
_, suffix = os.path.splitext(name)
if suffix not in self.VIDEO_SUFFIXES:
continue
episode = EpisodeName(name)
key = episode.get_key()
if key is None:
continue
if key in seen:
message = f"Duplicate episode video files found for {key!r}"
raise RuntimeError(message)
episodes.append(episode)
seen.add(key)
episodes.sort()
return episodes
class SubtitleFinder(Folder):
SUBTITLE_MIN_SIZE = 10_000 # bytes
SUBTITLE_SUFFIX = '.srt'
def find_subtitle(self, episode: EpisodeName) -> Path:
"""
Pick best subtitle for given episode.
TODO:
Only handles finding exactly one subtitle file currently.
Args:
episode:
Name of episode file.
Raises:
RuntimeError:
If no subtitle file can be found.
Returns:
Paths to subtitle file.
"""
subtitles = self.list_subtitles(episode)
if not subtitles:
raise RuntimeError(f"No subtitle file could be found for {episode!r}")
# Only one?
if len(subtitles) == 1:
return subtitles[0]
# Narrow by language
subtitles = self.filter_language(subtitles)
# Drop files that are too-small
subtitles = self.filter_small(subtitles)
# Exactly two? Drop largest
if len(subtitles) == 2:
if os.path.getsize(subtitles[0]) > os.path.getsize(subtitles[1]):
subtitles = [subtitles[1]]
else:
subtitles = [subtitles[0]]
# Only one again?
if len(subtitles) == 1:
return subtitles[0]
raise NotImplementedError(f"Found {len(subtitles)} subtitles for {episode}")
def filter_language(self, paths: List[Path]) -> List[Path]:
"""
Filter out non-english subtitles.
"""
english = []
for path in paths:
lowercase = path.name.casefold()
if 'english' in lowercase or 'eng' in lowercase:
english.append(path)
return english
def filter_small(self, paths: List[Path]) -> List[Path]:
"""
Drop subtitles that are too small.
"""
large = []
for path in paths:
if os.path.getsize(path) > self.SUBTITLE_MIN_SIZE:
large.append(path)
return large
def has_subtitle(self, episode: EpisodeName) -> bool:
"""
Does a subtitle file exists for the given episode?
Args:
episode:
File name of episode within folder
Return:
True if a matching subtitle exists for the given episode.
"""
subtitle_path = self.root / episode.get_subtitle_name()
return True if subtitle_path.is_file() else False
def list_subtitles(self, episode: EpisodeName) -> list[Path]:
"""
List all available subtitle files for the given episode.
Args:
episode:
Name of episode file.
Returns:
List of paths to subtitle files.
"""
# In its proper place?
if self.has_subtitle(episode):
subtitle = self.root / episode.get_subtitle_name()
assert subtitle.is_file(), f"Subtitle file not found: {subtitle}"
return [subtitle]
# Start looking around
subtitles: list[Path] = []
episode_key = episode.get_key()
assert isinstance(episode_key, str)
# 'Subs' folder?
found = self._find_subtitles_in_subs_folder(episode_key)
subtitles.extend(found)
return subtitles
def _find_subtitles_in_subs_folder(self, episode_key: str) -> list[Path]:
"""
Subtitles found in 'Subs' folder underneath episodes.
Args:
episode_key:
Plain string, eg. 'S02E13'
RuntimeError:
Unexpected
Return:
Possibly empty list of paths.
"""
# Abort early if no 'subs' folder found
subfolder = None
for name in self.folders:
if name.casefold() == 'subs':
subfolder = self.root / name
if subfolder is None:
return []
# Look in subs folder
subtitles: list[Path] = []
for entry in subfolder.iterdir():
entry_key = EpisodeName(entry.name).get_key()
if entry_key != episode_key:
continue
# Match found
if entry.is_dir():
subtitles.extend(entry.glob(f"*{self.SUBTITLE_SUFFIX}"))
elif entry.is_file():
subtitles.append(entry)
else:
raise RuntimeError("Non-regular file found: {entry}")
logger.debug(
"%s subtitle(s) found for %s under '%s/%s/'",
len(subtitles),
episode_key,
subfolder.parent.name,
subfolder.name,
)
return subtitles
def argparse_existing_folder(string: str) -> Path:
"""
An `argparse` type to convert string to a `Path` object.
Raises `argparse.ArgumentTypeError` if path does not exist.
"""
path = Path(string).expanduser().resolve()
error = None
if not path.exists():
error = f"Folder does not exist: {path}"
if not path.is_dir():
error = f"Path is not a folder: {path}"
if error is not None:
raise argparse.ArgumentTypeError(error)
return path
def main(options: argparse.Namespace) -> int:
folder = SeriesFolder(options.folder)
if folder.has_every_subtitle():
print("All subtitles in place, exiting.")
return 0
for episode in folder.episodes:
if folder.has_subtitle(episode):
continue
subtitle = folder.find_subtitle(episode)
folder.copy_subtitle(episode, subtitle)
return 0
def parse(arguments: list[str]) -> argparse.Namespace:
description = "Rename and move subtitle files for TV series"
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'folder',
metavar='FOLDER',
type=argparse_existing_folder,
help='folder to look under',
)
return parser.parse_args()
if __name__ == '__main__':
logging.basicConfig(format="%(message)s", level=logging.DEBUG)
options = parse(sys.argv[1:])
retval = main(options)
sys.exit(retval)