Skip to content

Commit

Permalink
Merge pull request #11531 from archesproject/jtw/bump-openpyxl
Browse files Browse the repository at this point in the history
Resolve DeprecationWarning and ResourceWarning in excel importers
  • Loading branch information
njkim authored Nov 12, 2024
2 parents b05f1a3 + 753f921 commit e3aeed2
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Installation is fully documented in the official documentation, [arches.readthed
```
then
```
arches-project create myproject
arches-admin startproject myproject
```
enter the new `myproject` directory
```
Expand Down
7 changes: 4 additions & 3 deletions arches/app/etl_modules/base_import_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,14 @@ def read(self, request=None, source=None):
if file.split(".")[-1] == "xlsx":
try:
uploaded_file_path = os.path.join(self.temp_dir, file)
workbook = load_workbook(
filename=default_storage.open(uploaded_file_path)
)
opened_file = default_storage.open(uploaded_file_path)
workbook = load_workbook(filename=opened_file, read_only=True)
self.validate_uploaded_file(workbook)
has_valid_excel_file = True
except:
pass
else:
opened_file.close()
if not has_valid_excel_file:
title = _("Invalid Uploaded File")
message = _(
Expand Down
5 changes: 4 additions & 1 deletion arches/app/etl_modules/branch_excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ def stage_excel_file(self, file, summary, cursor):
uploaded_file_path = os.path.join(
settings.UPLOADED_FILES_DIR, "tmp", self.loadid, file
)
workbook = load_workbook(filename=default_storage.open(uploaded_file_path))
opened_file = default_storage.open(uploaded_file_path)
workbook = load_workbook(filename=opened_file, read_only=True)
graphid = self.get_graphid(workbook)
nodegroup_lookup, nodes = self.get_graph_tree(graphid)
node_lookup = self.get_node_lookup(nodes)
Expand All @@ -272,6 +273,8 @@ def stage_excel_file(self, file, summary, cursor):
worksheet, cursor, node_lookup, nodegroup_lookup
)
summary["files"][file]["worksheets"].append(details)
opened_file.close()

cursor.execute(
"""UPDATE load_event SET load_details = %s WHERE loadid = %s""",
(json.dumps(summary), self.loadid),
Expand Down
5 changes: 4 additions & 1 deletion arches/app/etl_modules/tile_excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,8 @@ def stage_excel_file(self, file, summary, cursor):
uploaded_file_path = os.path.join(
settings.UPLOADED_FILES_DIR, "tmp", self.loadid, file
)
workbook = load_workbook(filename=default_storage.open(uploaded_file_path))
opened_file = default_storage.open(uploaded_file_path)
workbook = load_workbook(filename=opened_file, read_only=True)
graphid = self.get_graphid(workbook)
nodegroup_lookup, nodes = self.get_graph_tree(graphid)
node_lookup = self.get_node_lookup(nodes)
Expand All @@ -296,6 +297,8 @@ def stage_excel_file(self, file, summary, cursor):
worksheet, cursor, node_lookup, nodegroup_lookup
)
summary["files"][file]["worksheets"].append(details)
opened_file.close()

cursor.execute(
"""UPDATE load_event SET load_details = %s WHERE loadid = %s""",
(json.dumps(summary), self.loadid),
Expand Down
2 changes: 1 addition & 1 deletion arches/app/utils/file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_unknown_filetypes(self, file, extension=None):
errors.append(f"File type is not permitted: {extension}")
case "xlsx":
try:
load_workbook(io.BytesIO(file))
load_workbook(io.BytesIO(file), read_only=True)
except (InvalidFileException, zipfile.BadZipFile):
errors.append("Invalid xlsx workbook")
case "csv":
Expand Down
2 changes: 1 addition & 1 deletion docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ init_arches_project() {
cd_web_root
[[ -d ${APP_FOLDER} ]] || mkdir ${APP_FOLDER}

arches-project create ${ARCHES_PROJECT} --directory ${ARCHES_PROJECT}
arches-admin startproject ${ARCHES_PROJECT} --directory ${ARCHES_PROJECT}

exit_code=$?
if [[ ${exit_code} != 0 ]]; then
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dependencies = [
"edtf==4.0.1",
"elasticsearch>=8.3.1,<9.0.0",
"filetype==1.2.0",
"openpyxl==3.1.2",
"openpyxl==3.1.5",
"pillow>=7.0.0",
"polib==1.1.1",
"psycopg2==2.9.10",
Expand Down
2 changes: 1 addition & 1 deletion releases/7.6.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ System:
Python:
Upgraded:
Django == 4.2.16 (or <5.0.0)
openpyxl == 3.0.10
openpyxl == 3.1.2
filetype == 1.2.0
Added:
Expand Down
1 change: 1 addition & 0 deletions releases/8.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Python:
Django: 5.1.0
django-cors-headers: 4.4.0
psycopg2: 2.9.10
openpyxl: 3.1.5
Added:
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_check_dsstore_strict(self):
self.assertEqual(errors, ["File type is not permitted: DS_Store"])

@patch("filetype.guess", Mock(return_value=None))
@patch("arches.app.utils.file_validator.load_workbook", lambda noop: None)
@patch("arches.app.utils.file_validator.load_workbook", lambda file, **kwargs: None)
def test_valid_xlsx(self):
errors = self.validator.validate_file_type(self.mock_file, extension="xlsx")
self.assertEqual(errors, [])
Expand Down

0 comments on commit e3aeed2

Please sign in to comment.