Skip to content

Commit

Permalink
Use the encoding key as the encoding if a torrent file has that field
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenxxiu committed Jan 5, 2020
1 parent 1e680d0 commit 34939b7
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 14 deletions.
4 changes: 2 additions & 2 deletions lib/cfv/BitTorrent/bencode.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def decode_int(x, f):
return n, newf + 1


def decode_string(x, f, try_decode_utf8=True, force_decode_utf8=False):
def decode_string(x, f, try_decode_utf8=False, force_decode_utf8=False):
# type: (bytes, int, bool, bool) -> Tuple[bytes, int]
"""Decode torrent bencoded 'string' in x starting at f.
Expand Down Expand Up @@ -141,7 +141,7 @@ def decode_dict(x, f, force_sort=True):
r, f = OrderedDict(), f + 1

while x[f:f + 1] != b'e':
k, f = decode_string(x, f, force_decode_utf8=True)
k, f = decode_string(x, f, try_decode_utf8=True, force_decode_utf8=True)
r[k], f = decode_func[x[f:f + 1]](x, f)

if force_sort:
Expand Down
34 changes: 22 additions & 12 deletions lib/cfv/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,19 +1116,38 @@ class Torrent(ChksumType):
description = 'BitTorrent metainfo'
descinfo = 'name,size,SHA1(piecewise)'

@staticmethod
def _decode_str(s, encoding):
"""
Re-encode to utf-8 if already a string, then decode using the torrent's specified encoding
"""
if isinstance(s, str):
s = s.encode('utf-8')
try:
return s.decode(encoding)
except UnicodeDecodeError:
return s.decode('utf-8')

@staticmethod
def auto_chksumfile_match(file):
return file.peek(1) == b'd' and file.peek(4096).find(b'8:announce') >= 0

def do_test_chksumfile(self, file):
try:
metainfo = bencode.bdecode(file.read())
# Decode using encoding
encoding = metainfo.get('encoding', b'utf-8').decode('utf-8')
metainfo['announce'] = self._decode_str(metainfo['announce'], encoding)
if 'comment' in metainfo:
metainfo['comment'] = self._decode_str(metainfo['comment'], encoding)
metainfo['info']['name'] = self._decode_str(metainfo['info']['name'], encoding)
for file_info in metainfo.get('info', {}).get('files', []):
file_info['path'] = [self._decode_str(part, encoding) for part in file_info['path']]

btformats.check_message(metainfo)
except ValueError as e:
raise EnvironmentError(str(e) or 'invalid or corrupt torrent')

encoding = metainfo.get('encoding')

comments = []
if 'creation date' in metainfo:
try:
Expand All @@ -1137,22 +1156,13 @@ def do_test_chksumfile(self, file):
comments.append('created ' + repr(metainfo['creation date']))
if 'comment' in metainfo:
try:
comments.append(cfdecode(metainfo['comment'], encoding))
comments.append(metainfo['comment'])
except UnicodeError:
pass
self.do_test_chksumfile_print_testingline(file, ', '.join(comments))

def init_file(filenameparts, ftotpos, filesize):
done = 0
try:
filenameparts = [cffndecode(p.encode(), encoding) for p in filenameparts]
except LookupError as e: # lookup error is raised when specified encoding isn't found.
raise EnvironmentError(str(e))
except (UnicodeError, FilenameError) as e:
stats.cferror += 1
view.ev_test_cf_filenameencodingerror(file.name, repr(filenameparts), e)
done = 1
l_filename = filename = None
if not done:
filename = osutil.path_join(*filenameparts)
if not config.docrcchecks: # if we aren't testing checksums, just use the standard test_file function, so that -s and such will work.
Expand Down

0 comments on commit 34939b7

Please sign in to comment.