From 65f1bbf4fad17f4e33da5a95b0aedae41922d23f Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 3 Mar 2024 15:39:03 +0000 Subject: [PATCH 1/6] Update magic_data.json with .ctm, .pt2, .mlt Add three new formats with multiple headers using new reverse lookup feature --- puremagic/magic_data.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/puremagic/magic_data.json b/puremagic/magic_data.json index b89bdb1..f22520a 100644 --- a/puremagic/magic_data.json +++ b/puremagic/magic_data.json @@ -89,6 +89,15 @@ "52494658": [ ["4647444d", 8, ".dcr", "", "Adobe Shockwave"], ["4d563933", 8, ".dir", "", "Macromedia Director file format"] + ], + "4352454d" : [ + ["444f4e4500000000", -8, ".ctm", "", "CreamTracker module"] + ], + "3c747261636b206e616d653d22" : [ + ["3c2f747261636b3e0a", -9, ".pt2", "", "PicaTune 2 module"] + ], + "3c6d6c74" : [ + ["3c2f6d6c743e0a", -7, ".mlt", "", "Shotcut project"] ] }, "footers": [ @@ -1330,6 +1339,9 @@ ["6674797068656973", 4, ".heic", "image/heic", "HEIC Image format (HEIS scalable)"], ["667479706865696d", 4, ".heic", "image/heic", "HEIC Image format (HEIM multiview)"], ["667479706865766d", 4, ".heic", "image/heic", "HEIC Animated Image format (HEIM multiview)"], - ["6674797068657673", 4, ".heic", "image/heic", "HEIC Animated Image format (HEIS scalable)"] + ["6674797068657673", 4, ".heic", "image/heic", "HEIC Animated Image format (HEIS scalable)"], + ["4352454D", 44, ".ctm", "", "CreamTracker module"], + ["3c747261636b206e616d653d22", 0, ".pt2", "", "PicaTune 2 module"], + ["3c6d6c74", 38, ".mlt", "", "Shotcut project"] ] } From a557bd58f4702a23611162c81994239eb42ad313 Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 3 Mar 2024 15:44:54 +0000 Subject: [PATCH 2/6] Update main.py with reverse lookup Allows `multi_part_header_dict` to perform a footer style lookup, this is good for files with small primary headers that score low confidence to get an aggregate score from a fixed footer (that may also be small). The combined score improves confidence. --- puremagic/main.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/puremagic/main.py b/puremagic/main.py index cbbf71e..879ffec 100644 --- a/puremagic/main.py +++ b/puremagic/main.py @@ -151,20 +151,34 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC for matched in matches: if matched.byte_match in multi_part_header_dict: for magic_row in multi_part_header_dict[matched.byte_match]: - start = magic_row.offset - end = magic_row.offset + len(magic_row.byte_match) - if end > len(header): - continue - if header[start:end] == magic_row.byte_match: - new_matches.add( - PureMagic( - byte_match=header[matched.offset : end], - offset=magic_row.offset, - extension=magic_row.extension, - mime_type=magic_row.mime_type, - name=magic_row.name, + if '-' in str(magic_row.offset): + start = magic_row.offset + end = magic_row.offset + len(magic_row.byte_match) + match_area = footer[start:end] if end != 0 else footer[start:] + if match_area == magic_row.byte_match: + new_matches.add( + PureMagic( + byte_match=matched.byte_match + magic_row.byte_match, + offset=magic_row.offset, + extension=magic_row.extension, + mime_type=magic_row.mime_type, + name=magic_row.name, ) + ) + else: + start = magic_row.offset + end = magic_row.offset + len(magic_row.byte_match) + if end > len(header): + continue + if header[start:end] == magic_row.byte_match: + new_matches.add( + PureMagic( + byte_match=header[matched.offset : end], + offset=magic_row.offset, + extension=magic_row.extension, + mime_type=magic_row.mime_type, + name=magic_row.name, ) - ) + ) matches.extend(list(new_matches)) return _confidence(matches, ext) From ed74e52eb1938f40b810ef146a485eca5dc79452 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 7 Mar 2024 10:21:31 +0000 Subject: [PATCH 3/6] Update main.py reformat to hopefully pass build tests I think I has a poorly placed/spaced ) that may have been causing the autobuild to fails, hopefully this will resolve it. --- puremagic/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/puremagic/main.py b/puremagic/main.py index 879ffec..7cb7a8a 100644 --- a/puremagic/main.py +++ b/puremagic/main.py @@ -162,7 +162,8 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC offset=magic_row.offset, extension=magic_row.extension, mime_type=magic_row.mime_type, - name=magic_row.name, ) + name=magic_row.name, + ) ) else: start = magic_row.offset From 284daa4f3ea5bf97151b4f5ae8f026482baaedec Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 7 Mar 2024 10:29:17 +0000 Subject: [PATCH 4/6] Update main.py layout fixing using Black Second attempt to correct, ran it through Black so hopefully this will now clear the autobuild issues. --- puremagic/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/puremagic/main.py b/puremagic/main.py index 7cb7a8a..1675c47 100644 --- a/puremagic/main.py +++ b/puremagic/main.py @@ -151,21 +151,21 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC for matched in matches: if matched.byte_match in multi_part_header_dict: for magic_row in multi_part_header_dict[matched.byte_match]: - if '-' in str(magic_row.offset): + if "-" in str(magic_row.offset): start = magic_row.offset end = magic_row.offset + len(magic_row.byte_match) match_area = footer[start:end] if end != 0 else footer[start:] if match_area == magic_row.byte_match: new_matches.add( PureMagic( - byte_match=matched.byte_match + magic_row.byte_match, + byte_match=matched.byte_match + magic_row.byte_match, offset=magic_row.offset, extension=magic_row.extension, mime_type=magic_row.mime_type, name=magic_row.name, ) - ) - else: + ) + else: start = magic_row.offset end = magic_row.offset + len(magic_row.byte_match) if end > len(header): @@ -178,8 +178,8 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC extension=magic_row.extension, mime_type=magic_row.mime_type, name=magic_row.name, + ) ) - ) matches.extend(list(new_matches)) return _confidence(matches, ext) From 1a987174813d08e0763f4015b3c9ce6efd391fac Mon Sep 17 00:00:00 2001 From: Andy Date: Tue, 12 Mar 2024 10:10:41 +0000 Subject: [PATCH 5/6] Update main.py pythonic fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pythonic fixes suggested by @cdgriffith in PR comments. Checking in Black Playground so should merge first time. 🤞 --- puremagic/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/puremagic/main.py b/puremagic/main.py index 1675c47..ecfb3bc 100644 --- a/puremagic/main.py +++ b/puremagic/main.py @@ -151,9 +151,7 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC for matched in matches: if matched.byte_match in multi_part_header_dict: for magic_row in multi_part_header_dict[matched.byte_match]: - if "-" in str(magic_row.offset): - start = magic_row.offset - end = magic_row.offset + len(magic_row.byte_match) + if magic_row.offset < 0: match_area = footer[start:end] if end != 0 else footer[start:] if match_area == magic_row.byte_match: new_matches.add( @@ -166,8 +164,6 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC ) ) else: - start = magic_row.offset - end = magic_row.offset + len(magic_row.byte_match) if end > len(header): continue if header[start:end] == magic_row.byte_match: From 463e0133e7d5d88f8bd6db524f914209cb609ff2 Mon Sep 17 00:00:00 2001 From: Andy Date: Tue, 12 Mar 2024 10:13:14 +0000 Subject: [PATCH 6/6] Update main.py again Honestly need more coffee --- puremagic/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/puremagic/main.py b/puremagic/main.py index ecfb3bc..c98e2ff 100644 --- a/puremagic/main.py +++ b/puremagic/main.py @@ -151,7 +151,9 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> List[PureMagicWithC for matched in matches: if matched.byte_match in multi_part_header_dict: for magic_row in multi_part_header_dict[matched.byte_match]: - if magic_row.offset < 0: + start = magic_row.offset + end = magic_row.offset + len(magic_row.byte_match) + if magic_row.offset < 0: match_area = footer[start:end] if end != 0 else footer[start:] if match_area == magic_row.byte_match: new_matches.add(