Skip to content

Commit

Permalink
import-iae: Adapt to use the archive from RIAE
Browse files Browse the repository at this point in the history
  • Loading branch information
rsebille committed May 21, 2024
1 parent 33f1e22 commit 074c4c3
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 1,702 deletions.
10 changes: 8 additions & 2 deletions itou/companies/management/commands/_import_siae/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import gzip
import os
import shutil
from pathlib import Path

from py7zr import unpack_7zarchive
import pyzipper


def gunzip(archivepath, outdir):
Expand All @@ -15,5 +16,10 @@ def gunzip(archivepath, outdir):
raise shutil.ReadError from e


def unpack_riae_zip_aes_encrypted(path, directory, **kwargs):
with pyzipper.AESZipFile(path) as zf:
zf.extractall(directory, pwd=os.environ["ASP_RIAE_UNZIP_PASSWORD"].encode())


shutil.register_unpack_format("gz", [".gz"], gunzip)
shutil.register_unpack_format("7zip", [".7z"], unpack_7zarchive)
shutil.register_unpack_format("zip-riae", [".riae"], unpack_riae_zip_aes_encrypted)
11 changes: 3 additions & 8 deletions itou/companies/management/commands/_import_siae/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import pandas as pd
from django.conf import settings
from django.utils import timezone
from py7zr.exceptions import Bad7zFile

from itou.common_apps.address.models import AddressMixin
from itou.companies.models import Company
Expand Down Expand Up @@ -52,7 +51,7 @@ def get_filename(filename_prefix, filename_extension, description=None):
description = filename_prefix

filenames = []
extensions = (filename_extension, f"{filename_extension}.gz")
extensions = (filename_extension, f"{filename_extension}.gz", f"{filename_extension}.riae")
for filename in os.listdir(settings.ASP_FLUX_IAE_DIR):
if filename.startswith(f"{filename_prefix}_") and filename.endswith(extensions):
filenames.append(filename)
Expand Down Expand Up @@ -333,20 +332,16 @@ def get_fluxiae_df(
# the duration and frequency of the developer's headaches.

with tempfile.TemporaryDirectory() as d:
expected_exceptions = (
shutil.ReadError,
Bad7zFile, # Remove this exception if https://github.com/miurahr/py7zr/pull/583 is merged.
)
try:
# Use the file extension as hint for the unpack algorithm.
shutil.unpack_archive(filename, d)
except expected_exceptions:
except shutil.ReadError:
# The file extension does not represent the compression used, try known unpack formats.
for format, _ext, _desc in shutil.get_unpack_formats():
try:
shutil.unpack_archive(filename, d, format=format)
break
except expected_exceptions:
except shutil.ReadError:
pass
else:
raise ValueError(f"Unable to extract “{filename}”.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def populate_metabase_fluxiae(self):
self.populate_fluxiae_view(vue_name="fluxIAE_AnnexeFinanciere")
self.populate_fluxiae_view(vue_name="fluxIAE_AnnexeFinanciereACI")
self.populate_fluxiae_view(vue_name="fluxIAE_Convention")
self.populate_fluxiae_view(vue_name="fluxIAE_ContratMission", skip_first_row=False)
self.populate_fluxiae_view(vue_name="fluxIAE_ContratMission")
self.populate_fluxiae_view(vue_name="fluxIAE_Encadrement")
self.populate_fluxiae_view(vue_name="fluxIAE_EtatMensuelAgregat")
self.populate_fluxiae_view(vue_name="fluxIAE_EtatMensuelIndiv")
Expand All @@ -103,7 +103,7 @@ def populate_metabase_fluxiae(self):
self.populate_fluxiae_view(vue_name="fluxIAE_Missions")
self.populate_fluxiae_view(vue_name="fluxIAE_MissionsEtatMensuelIndiv")
self.populate_fluxiae_view(vue_name="fluxIAE_PMSMP")
self.populate_fluxiae_view(vue_name="fluxIAE_Salarie", skip_first_row=False)
self.populate_fluxiae_view(vue_name="fluxIAE_Salarie")
self.populate_fluxiae_view(vue_name="fluxIAE_Structure")

build_dbt_weekly()
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ httpx # https://github.com/encode/httpx/

# SFTP file transfer for ASP
paramiko # https://github.com/paramiko/paramiko
py7zr # https://github.com/miurahr/py7zr
pyzipper # https://github.com/danifus/pyzipper

# Stream XLSX files
xlsx_streaming # https://github.com/Polyconseil/xlsx_streaming
Expand Down
556 changes: 5 additions & 551 deletions requirements/base.txt

Large diffs are not rendered by default.

572 changes: 5 additions & 567 deletions requirements/dev.txt

Large diffs are not rendered by default.

572 changes: 5 additions & 567 deletions requirements/test.txt

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions scripts/import-iae.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ set +x
echo "Running the ASP import script for IAE"
cd "$APP_HOME" || exit

FLUX_IAE_FILE_GLOB='fluxIAE_*.zip'
FLUX_IAE_FILE_GLOB='fluxIAE_*.tar.gz'

FLUX_IAE_FILE=$(find dgefp_shared_bucket/ -name "$FLUX_IAE_FILE_GLOB" -type f -mtime -5)
FLUX_IAE_FILE=$(find asp_riae_shared_bucket/ -name "$FLUX_IAE_FILE_GLOB" -type f -mtime -5)
if [[ ! -f "$FLUX_IAE_FILE" ]]; then
echo "Missing the flux IAE file."
exit 0
Expand All @@ -21,7 +21,8 @@ mkdir -p "$FLUX_IAE_DIR"
rm -rf "$FLUX_IAE_DIR"*

# Unzip files
unzip -P "$ASP_UNZIP_PASSWORD" "$FLUX_IAE_FILE" -d "$FLUX_IAE_DIR"
unzip "$FLUX_IAE_FILE" -d "$FLUX_IAE_DIR" # tar.gz extension but it's a Zip file...
rename --last --no-overwrite '.gz' '.riae' "$FLUX_IAE_DIR"/*.csv.gz # Use a custom extension to identify them for shutil.unpack_archive()

# Create the logs directory
OUTPUT_PATH="shared_bucket/imports-asp"
Expand All @@ -37,4 +38,4 @@ time ./manage.py import_siae --wet-run --verbosity=2 |& tee -a "$OUTPUT_PATH/imp
rm -rf "$FLUX_IAE_DIR"

# Remove files older than 3 weeks
find dgefp_shared_bucket/ -name "$FLUX_IAE_FILE_GLOB" -type f -mtime +20 -delete
find asp_riae_shared_bucket/ -name "$FLUX_IAE_FILE_GLOB" -type f -mtime +20 -delete

0 comments on commit 074c4c3

Please sign in to comment.