-
Notifications
You must be signed in to change notification settings - Fork 0
/
comicmetalink.py
205 lines (171 loc) · 7.84 KB
/
comicmetalink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import re
import zipfile
import xml.etree.ElementTree as ET
import requests
from collections import defaultdict
import logging
import concurrent.futures
import argparse
import gc
import unidecode
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Configuration of source and destination directories
source_directories = [
'/path/to/your/comics/'
]
destination_root = '/path/to/your/links/' # Destination directory for symlinks
discord_webhook_url = 'https://your_webhook_address' # Create a webhook (Eg: on discord) and put the link here
# Function to clean file and directory names
def clean_name(name):
# Remove special characters except underscores, dashes, spaces, and apostrophes
name = re.sub(r'[^\w\s\-\'À-ÿ]', '', name)
name = name.replace(' ', '_') # Replace spaces with underscores
name = unidecode.unidecode(name) # Remove accents
return name
# Function to read and extract content from ComicInfo.xml
def extract_comicinfo_from_cbz(cbz_path):
try:
with zipfile.ZipFile(cbz_path, 'r') as zip_ref:
if 'ComicInfo.xml' in zip_ref.namelist():
with zip_ref.open('ComicInfo.xml') as file:
tree = ET.parse(file)
root = tree.getroot()
series = root.find('Series')
title = root.find('Title')
series_text = series.text if series is not None else 'N/A'
title_text = title.text if title is not None else 'N/A'
logging.info(f"Extracted from {cbz_path} - Series: {series_text}, Title: {title_text}")
return {
"series": series_text,
"title": title_text
}
else:
logging.warning(f"ComicInfo.xml not found in {cbz_path}")
return None
except Exception as e:
logging.error(f"Error processing {cbz_path}: {e}")
return None
# Function to create symlinks
def create_symlink(source, dest):
try:
if os.path.exists(source):
if not os.path.exists(dest):
os.symlink(source, dest)
logging.info(f'Symlink created: {dest}')
else:
logging.info(f'Symlink already exists: {dest}')
else:
logging.warning(f"Source file does not exist: {source}")
except Exception as e:
logging.error(f"Failed to create symlink for {source}: {e}")
# Function to process a file
def process_file(cbz_path, stats, comics_db):
info = extract_comicinfo_from_cbz(cbz_path)
if info:
stats["files_with_comicinfo"] += 1
series = info["series"]
title = info["title"]
if series == 'N/A' or title == 'N/A':
stats["files_with_missing_info"] += 1
else:
match = re.search(r'\.T(\d+)', cbz_path)
volume = match.group(1) if match else "NoVolume"
volume = volume.zfill(2) # Format volume as two digits
comics_db[(series, volume)].append((cbz_path, title))
else:
stats["files_without_comicinfo"] += 1
process_file_without_comicinfo(cbz_path, stats, comics_db)
def process_file_without_comicinfo(cbz_path, stats, comics_db):
filename = os.path.basename(cbz_path)
directory = os.path.basename(os.path.dirname(cbz_path))
series = directory # Use the directory name as the series name
logging.info(f"ComicInfo.xml not found in {cbz_path}. Creating symlink based on directory and filename.")
dest_dir = os.path.join(destination_root, clean_name(series))
os.makedirs(dest_dir, exist_ok=True)
dest = os.path.join(dest_dir, filename)
create_symlink(cbz_path, dest)
stats["symlinks_created_filename"] += 1
def process_directories(source_directories):
stats = {
"total_files": 0,
"files_with_comicinfo": 0,
"files_without_comicinfo": 0,
"files_with_missing_info": 0,
"symlinks_created_comicinfo": 0,
"symlinks_created_filename": 0,
"symlinks_removed": 0,
"duplicates": 0
}
comics_db = defaultdict(list)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for source_directory in source_directories:
for root, _, files in os.walk(source_directory):
for file in files:
if file.endswith(('.cbz', '.cbr', '.pdf')):
stats["total_files"] += 1
cbz_path = os.path.join(root, file)
futures.append(executor.submit(process_file, cbz_path, stats, comics_db))
for future in concurrent.futures.as_completed(futures):
future.result()
# Create symlinks for the best files
for (series, volume), files in comics_db.items():
best_file = sorted(files, key=lambda x: os.path.getsize(x[0]), reverse=True)[0]
source, title = best_file
dest_dir = os.path.join(destination_root, clean_name(series))
os.makedirs(dest_dir, exist_ok=True)
ext = os.path.splitext(source)[1]
if volume != "NoVolume":
dest = os.path.join(dest_dir, f"{volume}.{clean_name(title)}{ext}")
else:
dest = os.path.join(dest_dir, clean_name(os.path.basename(source)))
create_symlink(source, dest)
stats["symlinks_created_comicinfo"] += 1
stats["duplicates"] += len(files) - 1
return stats
def cleanup_symlinks(destination_root, stats):
for root, _, files in os.walk(destination_root):
for file in files:
symlink_path = os.path.join(root, file)
if os.path.islink(symlink_path):
target_path = os.readlink(symlink_path)
if not os.path.exists(target_path):
os.unlink(symlink_path)
logging.info(f"Removed obsolete symlink: {symlink_path}")
stats["symlinks_removed"] += 1
# Function to send a notification to Discord
def send_discord_notification(stats):
message = (
f"Total files processed: {stats['total_files']}\n"
f"Files with ComicInfo.xml: {stats['files_with_comicinfo']}\n"
f"Files without ComicInfo.xml: {stats['files_without_comicinfo']}\n"
f"Files with missing info: {stats['files_with_missing_info']}\n"
f"Symlinks created based on existing comicinfo: {stats['symlinks_created_comicinfo']}\n"
f"Symlinks created based on filenames (missing comicinfo): {stats['symlinks_created_filename']}\n"
f"Duplicates detected (not reflinked): {stats['duplicates']}\n"
f"Symlinks removed: {stats['symlinks_removed']}\n"
)
if len(message) > 2000:
message = message[:1997] + '...'
data = {"content": message}
response = requests.post(discord_webhook_url, json=data)
if response.status_code != 204:
logging.error(f"Failed to send Discord notification: {response.status_code} - {response.text}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process CBZ files and create symlinks.')
parser.add_argument('--source', nargs='+', default=source_directories, help='List of source directories to process.')
parser.add_argument('--dest', default=destination_root, help='Destination root directory for symlinks.')
parser.add_argument('--webhook', default=discord_webhook_url, help='Discord webhook URL for notifications.')
args = parser.parse_args()
source_directories = args.source
destination_root = args.dest
discord_webhook_url = args.webhook
# Process source directories
stats = process_directories(source_directories)
# Clean up obsolete symlinks
cleanup_symlinks(destination_root, stats)
# Send statistics to Discord
send_discord_notification(stats)
logging.info("Processing complete. Statistics sent to Discord.")