Skip to content

Commit

Permalink
commands.extract.extractors - limit sanitized filenames to 20 charact…
Browse files Browse the repository at this point in the history
…ers and use unique prefixes

Fix #748
  • Loading branch information
MatteoCampinoti94 committed Dec 17, 2024
1 parent 74e2b83 commit a4a44ca
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 3 deletions.
4 changes: 2 additions & 2 deletions digiarch/commands/extract/extractors/extractor_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def extract(self) -> list[tuple[Path, Path]]:
if isinstance(attachment, (Message, MessageSigned)):
name: str = (attachment.filename or "").strip() or (attachment.subject or "").strip()
name = name.strip() or f"attachment-{n}"
name_sanitized: str = sanitize_filename(name).strip("_") or f"attachment-{n}"
name_sanitized: str = sanitize_filename(name, 20, True).strip("_") or f"attachment-{n}"
attachment.export(tmp_dir / name_sanitized)
files.append((name_sanitized, name))
elif isinstance(attachment.data, bytes):
Expand All @@ -133,7 +133,7 @@ def extract(self) -> list[tuple[Path, Path]]:
else attachment.longFilename or ""
)
name = name.strip() or f"attachment-{n}"
name_sanitized: str = sanitize_filename(name).strip("_") or f"attachment-{n}"
name_sanitized: str = sanitize_filename(name, 20, True).strip("_") or f"attachment-{n}"
with tmp_dir.joinpath(name_sanitized).open("wb") as fh:
fh.write(attachment.data or b"")
files.append((name_sanitized, name))
Expand Down
2 changes: 2 additions & 0 deletions digiarch/commands/extract/extractors/extractor_patool.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from patoolib import extract_archive
from patoolib.util import PatoolError

from digiarch.common import sanitize_filename
from digiarch.common import sanitize_path
from digiarch.common import TempDir

Expand Down Expand Up @@ -62,6 +63,7 @@ def extract(self) -> list[tuple[Path, Path]]:

for path in find_files(tmp_dir):
path_sanitized: Path = extract_folder / sanitize_path(path.relative_to(tmp_dir))
path_sanitized = path_sanitized.with_name(sanitize_filename(path_sanitized.name, 20, True))
while path_sanitized.exists():
path_sanitized = path_sanitized.with_name("_" + path_sanitized.name)
path_sanitized.parent.mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion digiarch/commands/extract/extractors/extractor_tnef.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def extract(self) -> list[tuple[Path, Path]]:
with TempDir(self.file.root) as tmp_dir:
for attachment in tnef.attachments:
name: str = attachment.long_filename() or attachment.name
path: Path = tmp_dir.joinpath(sanitize_filename(name))
path: Path = tmp_dir.joinpath(sanitize_filename(name, 20, True))
with path.open("wb") as oh:
oh.write(attachment.data)
files.append((path.name, name))
Expand Down
2 changes: 2 additions & 0 deletions digiarch/commands/extract/extractors/extractor_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from zipfile import LargeZipFile
from zipfile import ZipFile

from digiarch.common import sanitize_filename
from digiarch.common import sanitize_path
from digiarch.common import TempDir

Expand Down Expand Up @@ -37,6 +38,7 @@ def extract(self) -> list[tuple[Path, Path]]:

path_original: Path = Path(zf.extract(member, tmp_extract))
path_final: Path = tmp_final.joinpath(sanitize_path(path_original.relative_to(tmp_extract)))
path_final = path_final.with_name(sanitize_filename(path_final.name, 20, True))
while path_final.exists():
path_final = path_final.with_name("_" + path_final.name)

Expand Down

0 comments on commit a4a44ca

Please sign in to comment.