diff --git a/digiarch/commands/extract/extractors/extractor_msg.py b/digiarch/commands/extract/extractors/extractor_msg.py index e0e813a7..be52d67a 100644 --- a/digiarch/commands/extract/extractors/extractor_msg.py +++ b/digiarch/commands/extract/extractors/extractor_msg.py @@ -123,7 +123,7 @@ def extract(self) -> list[tuple[Path, Path]]: if isinstance(attachment, (Message, MessageSigned)): name: str = (attachment.filename or "").strip() or (attachment.subject or "").strip() name = name.strip() or f"attachment-{n}" - name_sanitized: str = sanitize_filename(name).strip("_") or f"attachment-{n}" + name_sanitized: str = sanitize_filename(name, 20, True).strip("_") or f"attachment-{n}" attachment.export(tmp_dir / name_sanitized) files.append((name_sanitized, name)) elif isinstance(attachment.data, bytes): @@ -133,7 +133,7 @@ def extract(self) -> list[tuple[Path, Path]]: else attachment.longFilename or "" ) name = name.strip() or f"attachment-{n}" - name_sanitized: str = sanitize_filename(name).strip("_") or f"attachment-{n}" + name_sanitized: str = sanitize_filename(name, 20, True).strip("_") or f"attachment-{n}" with tmp_dir.joinpath(name_sanitized).open("wb") as fh: fh.write(attachment.data or b"") files.append((name_sanitized, name)) diff --git a/digiarch/commands/extract/extractors/extractor_patool.py b/digiarch/commands/extract/extractors/extractor_patool.py index 8a399aad..993866d5 100644 --- a/digiarch/commands/extract/extractors/extractor_patool.py +++ b/digiarch/commands/extract/extractors/extractor_patool.py @@ -5,6 +5,7 @@ from patoolib import extract_archive from patoolib.util import PatoolError +from digiarch.common import sanitize_filename from digiarch.common import sanitize_path from digiarch.common import TempDir @@ -62,6 +63,7 @@ def extract(self) -> list[tuple[Path, Path]]: for path in find_files(tmp_dir): path_sanitized: Path = extract_folder / sanitize_path(path.relative_to(tmp_dir)) + path_sanitized = path_sanitized.with_name(sanitize_filename(path_sanitized.name, 20, True)) while path_sanitized.exists(): path_sanitized = path_sanitized.with_name("_" + path_sanitized.name) path_sanitized.parent.mkdir(parents=True, exist_ok=True) diff --git a/digiarch/commands/extract/extractors/extractor_tnef.py b/digiarch/commands/extract/extractors/extractor_tnef.py index bc090e71..21617eb0 100644 --- a/digiarch/commands/extract/extractors/extractor_tnef.py +++ b/digiarch/commands/extract/extractors/extractor_tnef.py @@ -25,7 +25,7 @@ def extract(self) -> list[tuple[Path, Path]]: with TempDir(self.file.root) as tmp_dir: for attachment in tnef.attachments: name: str = attachment.long_filename() or attachment.name - path: Path = tmp_dir.joinpath(sanitize_filename(name)) + path: Path = tmp_dir.joinpath(sanitize_filename(name, 20, True)) with path.open("wb") as oh: oh.write(attachment.data) files.append((path.name, name)) diff --git a/digiarch/commands/extract/extractors/extractor_zip.py b/digiarch/commands/extract/extractors/extractor_zip.py index 4f672150..54ae6852 100644 --- a/digiarch/commands/extract/extractors/extractor_zip.py +++ b/digiarch/commands/extract/extractors/extractor_zip.py @@ -4,6 +4,7 @@ from zipfile import LargeZipFile from zipfile import ZipFile +from digiarch.common import sanitize_filename from digiarch.common import sanitize_path from digiarch.common import TempDir @@ -37,6 +38,7 @@ def extract(self) -> list[tuple[Path, Path]]: path_original: Path = Path(zf.extract(member, tmp_extract)) path_final: Path = tmp_final.joinpath(sanitize_path(path_original.relative_to(tmp_extract))) + path_final = path_final.with_name(sanitize_filename(path_final.name, 20, True)) while path_final.exists(): path_final = path_final.with_name("_" + path_final.name)