forked from arbingordon/vgmdbrip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvgmdbrip.py
275 lines (237 loc) · 10.2 KB
/
vgmdbrip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
from sys import argv
import os
import json
import hashlib
import getpass
import pickle
import requests
from bs4 import BeautifulSoup
# Set these variables to your designed preferences:
add_info_to_output_folder_name = True
add_order_number_to_filename = True
add_source_to_filename = True
allow_approximation_of_invalid_characters = True
allow_input_folder_detection = True
allow_no_arguments = True
allow_search_terms = True
create_folder_image = True
default_download_to_script_directory = True
process_each_argument_separately = True
show_initial_query = True
output_tab_padding = 4
if allow_search_terms:
import re
scriptdir = os.sep.join(argv[0].split("\\")[:-1])
config = os.path.join(scriptdir, 'vgmdbrip.pkl')
session = requests.Session()
def Soup(data):
return BeautifulSoup(data, "html.parser")
def login():
global session
if os.path.isfile(config):
session = pickle.load(open(config, "rb"))
else:
while True:
username = input('VGMdb username:\t')
password = getpass.getpass('VGMdb password:\t')
base_url = 'https://vgmdb.net/forums/'
x = session.post(base_url + 'login.php?do=login', {
'vb_login_username': username,
'vb_login_password': password,
'vb_login_md5password': hashlib.md5(password.encode()).hexdigest(),
'vb_login_md5password_utf': hashlib.md5(password.encode()).hexdigest(),
'cookieuser': 1,
'do': 'login',
's': '',
'securitytoken': 'guest'
})
table = Soup(x.content).find('table', class_='tborder', width="70%")
panel = table.find('div', class_='panel')
message = panel.text.strip()
print(message)
if message.startswith('You'):
if message[223] == '5':
raise SystemExit(1)
print(message)
continue
elif message.startswith('Wrong'):
raise SystemExit(1)
else:
break
def print_aligned_columns(arr):
global output_tab_padding
# Split each row by tabs and transpose the matrix.
columns = zip(*[row.split('\t') for row in arr])
# Calculate the maximum width for each column.
max_widths = [max(len(cell) + output_tab_padding - 1 for cell in col) for col in columns]
# Print each row with aligned columns.
for row in arr:
cells = row.split('\t')
formatted_row = ' '.join(f"{cell:{width}}" for cell, width in zip(cells, max_widths))
print(formatted_row)
def remove(instring, chars):
for i in range(len(chars)):
instring = instring.replace(chars[i],"")
return instring
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
# Commenting this out to allow prompt approach.
#if(len(argv) < 2):
# print("usage: " + argv[0] + " vgmdb_album_id")
# raise SystemExit(1)
login()
soup = ""
if default_download_to_script_directory:
os.chdir(scriptdir)
def download_vgmdb_art(query):
ids = []
choice_index = 0
# If allow_input_folder_detection is enabled and query is to a file or folder that exists:
if allow_input_folder_detection and os.path.exists(query):
if os.path.isfile(query):
# Change to the folder path.
os.chdir(os.path.dirname(query))
# Get the folder name.
query = os.path.basename(os.path.dirname(query))
else:
os.chdir(query)
# Get the folder name.
query = os.path.basename(query)
# If the folder name contains spaces:
if " " in query:
# Remove hyphens so that terms aren't excluded.
query = query.replace("-", "")
if show_initial_query:
print('Query: ' + query)
while True:
#print('Query: ' + query)
query = query.replace("https://vgmdb.net/album/", "")
if(query.isdigit()):
soup = Soup(session.get("https://vgmdb.net/album/" + query).content)
break
if allow_search_terms:
soup = Soup(session.get("https://vgmdb.net/search?q=" + query).content)
else:
soup = Soup(session.get("https://vgmdb.net/search?q=\"" + query + "\"").content)
if(soup.title.text[:6] != "Search"):
break
else:
if not allow_search_terms:
print("stuck at search results")
exit(1)
soupHTML = str(soup)
#print(soupHTML)
# Get all matches and split them into separate lines
#import re
ids = re.findall('href="http://vgmdb.net/album/(\d+)"\s+title="[^"]+"', soupHTML)
catalogs = re.findall('span class="catalog[^"]*">([^<]+)</span>', soupHTML)
album_titles = re.findall('href="http://vgmdb.net/album/\d+"\s+title="([^"]+)"', soupHTML)
release_dates = re.findall('"View albums released on ([^"]+)', soupHTML)
release_dates += re.findall('text-align: right[^>]+>(\d\d\d\d)<', soupHTML)
media_formats = re.findall('text-align: right[^>]+>([^<>\r\n]+[^<>\r\n\d])<', soupHTML)
if len(ids) > 0:
search_result = ""
search_results = []
print("Here are the search results:")
for idx, match in enumerate(ids):
search_result = f"{idx + 1}."
if len(catalogs) == len(ids): search_result += f"\t{catalogs[idx]}"
if len(album_titles) == len(ids): search_result += f"\t{album_titles[idx]}"
if len(release_dates) == len(ids): search_result += f"\t{release_dates[idx]}"
if len(media_formats) == len(ids): search_result += f"\t{media_formats[idx]}"
search_results.append(search_result)
print_aligned_columns(search_results)
while True:
query = input("Enter the number of the match you want or a different query: ")
if(not query.isdigit()):
print(f"Input is not an integer. Using new query: {query}")
break
else:
choice_index = int(query)
# Adjust for 0-based indexing.
choice_index -= 1
if 0 <= choice_index < len(ids):
query = ids[choice_index]
break
else:
print("Invalid number. Please enter a valid match number.")
continue
else:
query = input("Enter a different query: ")
continue
print('Title: ' + soup.title.text)
folder = "Scans (VGMdb)"
if add_info_to_output_folder_name and len(ids) > 0:
if len(media_formats) == len(ids): folder += f" ({media_formats[choice_index]})"
if len(catalogs) == len(ids) and catalogs[choice_index] != "N/A": folder += f" [{catalogs[choice_index]}]"
folder = get_valid_windows_name(folder, allow_approximation_of_invalid_characters)
gallery = soup.find("div", attrs={"class" : "covertab", "id" : "cover_gallery"})
for idx, scan in enumerate(gallery.find_all("a", attrs={"class": "highslide"}), start=1):
url = scan["href"]
title = get_valid_windows_name(scan.text, allow_approximation_of_invalid_characters)
image = session.get(url).content
ensure_dir(folder + os.sep)
order_number = str(idx).zfill(2)
source_filename = os.path.splitext(os.path.basename(url))[0]
filename = ""
if add_order_number_to_filename: filename += f"{order_number} "
filename += title
if add_source_to_filename: filename += f" [{source_filename}]"
filename += url[-4:]
if idx == 1 and create_folder_image:
# Use .jpg regardless of file extension as cheat to ensure Windows shows it as folder thumbnail.
folder_image_filename = "folder.jpg"
# Use this instead if you want to use proper filename.
#folder_image_filename = f"folder{url[-4:]}"
if not os.path.exists(folder_image_filename):
with open(folder_image_filename, "wb") as f:
f.write(image)
print(folder_image_filename + " downloaded")
with open(os.path.join(folder, filename), "wb") as f:
f.write(image)
print(title + " downloaded")
pickle.dump(session, open(config, "wb"))
def get_valid_windows_name(filename, approximation):
"""
Replaces forbidden characters in filename.
Args:
filename (str): The input text to process.
approximation (bool): Whether to replace illegal characters using unusual characters that approximate them.
Returns:
str: The processed text with forbidden characters replaced.
"""
# Strip leading and trailing whitespace from the filename.
filename = filename.strip()
if not approximation:
filename = remove(filename, "\"*/:<>?\|")
else:
# Define a dictionary of filename bad characters and their replacements.
replacements = {
'"': '“',
'>': '>',
'<': '<',
'?': '?',
':': ':',
'*': '✱',
'|': '│',
'\\': '\',
'/': '/'
}
# Replace all bad characters with their equivalent replacements.
for char in replacements:
filename = filename.replace(char, replacements[char])
return filename
if len(argv) < 2:
if allow_no_arguments:
download_vgmdb_art(input("Enter the VGMdb URL ID or search query for which you want to download album art: "))
else:
print("usage: " + argv[0] + " vgmdb_album_id")
raise SystemExit(1)
else:
if process_each_argument_separately:
for arg in argv[1:]:
download_vgmdb_art(f"{arg}")
else:
download_vgmdb_art(" ".join(argv[1:]))