Skip to content

Commit

Permalink
Small performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss committed May 16, 2024
1 parent 88bc58f commit 72a4464
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 20 deletions.
1 change: 0 additions & 1 deletion puremagic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from puremagic.main import __version__, __author__
from puremagic.main import *
1 change: 0 additions & 1 deletion puremagic/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from puremagic.main import command_line_entry

command_line_entry()
22 changes: 11 additions & 11 deletions puremagic/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
puremagic is a pure python module that will identify a file based off it's
magic numbers. It is designed to be minimalistic and inherently cross platform
Expand Down Expand Up @@ -118,15 +117,16 @@ def _confidence(matches, ext=None) -> List[PureMagicWithConfidence]:
"""Rough confidence based on string length and file extension"""
results = []
for match in matches:
con = 0.8 if len(match.byte_match) >= 9 else float("0.{0}".format(len(match.byte_match)))
con = 0.8 if len(match.byte_match) >= 9 else float(f"0.{len(match.byte_match)}")
if con >= 0.1 and ext and ext == match.extension:
con = 0.9
results.append(PureMagicWithConfidence(confidence=con, **match._asdict()))

if not results and ext:
for magic_row in extension_only_array:
if ext == magic_row.extension:
results.append(PureMagicWithConfidence(confidence=0.1, **magic_row._asdict()))
results = [
PureMagicWithConfidence(confidence=0.1, **magic_row._asdict())
for magic_row in extension_only_array if ext == magic_row.extension
]

if not results:
raise PureError("Could not identify file")
Expand All @@ -139,7 +139,7 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC

# Capture the length of the data
# That way we do not try to identify bytes that don't exist
matches = list()
matches = []
for magic_row in magic_header_array:
start = magic_row.offset
end = magic_row.offset + len(magic_row.byte_match)
Expand Down Expand Up @@ -208,7 +208,7 @@ def _file_details(filename: Union[os.PathLike, str]) -> Tuple[bytes, bytes]:
head = fin.read(max_head)
try:
fin.seek(-max_foot, os.SEEK_END)
except IOError:
except OSError:
fin.seek(0)
foot = fin.read()
return head, foot
Expand Down Expand Up @@ -240,7 +240,7 @@ def ext_from_filename(filename: Union[os.PathLike, str]) -> str:
base, ext = str(filename).lower().rsplit(".", 1)
except ValueError:
return ""
ext = ".{0}".format(ext)
ext = f".{ext}"
all_exts = [x.extension for x in chain(magic_header_array, magic_footer_array)]

if base[-4:].startswith("."):
Expand Down Expand Up @@ -379,12 +379,12 @@ def command_line_entry(*args):

for fn in args.files:
if not os.path.exists(fn):
print("File '{0}' does not exist!".format(fn))
print(f"File '{fn}' does not exist!")
continue
try:
print("'{0}' : {1}".format(fn, from_file(fn, args.mime)))
print(f"'{fn}' : {from_file(fn, args.mime)}")
except PureError:
print("'{0}' : could not be Identified".format(fn))
print(f"'{fn}' : could not be Identified")


if __name__ == "__main__":
Expand Down
3 changes: 1 addition & 2 deletions scripts/parse_ftk_kessler_sigs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This is a very ugly helper script to keep up to date with file types in
Gary C. Kessler's FTK_sigs_GCK archive.
Expand Down Expand Up @@ -60,6 +59,6 @@
elif sig["SIG"] not in known_sigs:
for ext in sig["EXT_NAME"]:
if ext != "(none)":
print("\t\t{},".format(json.dumps([sig["SIG"], offset, ".{}".format(ext), "", sig["DESCRIPTION"]])))
print("\t\t{},".format(json.dumps([sig["SIG"], offset, f".{ext}", "", sig["DESCRIPTION"]])))
else:
print("\t\t{},".format(json.dumps([sig["SIG"], offset, "", "", sig["DESCRIPTION"]])))
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from setuptools import setup
import os
Expand All @@ -8,12 +7,12 @@

root = os.path.abspath(os.path.dirname(__file__))

with open(os.path.join(root, "puremagic", "main.py"), "r") as reuse_file:
with open(os.path.join(root, "puremagic", "main.py")) as reuse_file:
reuse_content = reuse_file.read()

attrs = dict(re.findall(r"__([a-z]+)__ *= *['\"](.+)['\"]", reuse_content))

with open("README.rst", "r") as readme_file:
with open("README.rst") as readme_file:
long_description = readme_file.read()

setup(
Expand Down
3 changes: 1 addition & 2 deletions test/test_common_extensions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import unittest
from tempfile import NamedTemporaryFile
import os
Expand Down Expand Up @@ -55,7 +54,7 @@ def group_test(self, directory):
if ext_failures:
raise AssertionError(
"The following files did not have the expected extensions: {}".format(
", ".join(['"{}" expected "{}"'.format(item, ext) for item, ext in ext_failures])
", ".join([f'"{item}" expected "{ext}"' for item, ext in ext_failures])
)
)
if mime_failures:
Expand Down

0 comments on commit 72a4464

Please sign in to comment.