From 753f92154786b21015adeda55bcdcd378a5c8834 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 8 Oct 2024 08:51:50 -0400 Subject: [PATCH] Fix ResourceWarning from unclosed excel files --- arches/app/etl_modules/base_import_module.py | 7 ++++--- arches/app/etl_modules/branch_excel_importer.py | 5 ++++- arches/app/etl_modules/tile_excel_importer.py | 5 ++++- arches/app/utils/file_validator.py | 2 +- tests/utils/test_file_validator.py | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arches/app/etl_modules/base_import_module.py b/arches/app/etl_modules/base_import_module.py index f86605902a6..a38732e3614 100644 --- a/arches/app/etl_modules/base_import_module.py +++ b/arches/app/etl_modules/base_import_module.py @@ -354,13 +354,14 @@ def read(self, request=None, source=None): if file.split(".")[-1] == "xlsx": try: uploaded_file_path = os.path.join(self.temp_dir, file) - workbook = load_workbook( - filename=default_storage.open(uploaded_file_path) - ) + opened_file = default_storage.open(uploaded_file_path) + workbook = load_workbook(filename=opened_file, read_only=True) self.validate_uploaded_file(workbook) has_valid_excel_file = True except: pass + else: + opened_file.close() if not has_valid_excel_file: title = _("Invalid Uploaded File") message = _( diff --git a/arches/app/etl_modules/branch_excel_importer.py b/arches/app/etl_modules/branch_excel_importer.py index c93dea4a8e8..ca811a22750 100644 --- a/arches/app/etl_modules/branch_excel_importer.py +++ b/arches/app/etl_modules/branch_excel_importer.py @@ -261,7 +261,8 @@ def stage_excel_file(self, file, summary, cursor): uploaded_file_path = os.path.join( settings.UPLOADED_FILES_DIR, "tmp", self.loadid, file ) - workbook = load_workbook(filename=default_storage.open(uploaded_file_path)) + opened_file = default_storage.open(uploaded_file_path) + workbook = load_workbook(filename=opened_file, read_only=True) graphid = self.get_graphid(workbook) nodegroup_lookup, nodes = self.get_graph_tree(graphid) node_lookup = self.get_node_lookup(nodes) @@ -272,6 +273,8 @@ def stage_excel_file(self, file, summary, cursor): worksheet, cursor, node_lookup, nodegroup_lookup ) summary["files"][file]["worksheets"].append(details) + opened_file.close() + cursor.execute( """UPDATE load_event SET load_details = %s WHERE loadid = %s""", (json.dumps(summary), self.loadid), diff --git a/arches/app/etl_modules/tile_excel_importer.py b/arches/app/etl_modules/tile_excel_importer.py index 9a42bcb24b8..1510fc52bba 100644 --- a/arches/app/etl_modules/tile_excel_importer.py +++ b/arches/app/etl_modules/tile_excel_importer.py @@ -261,7 +261,8 @@ def stage_excel_file(self, file, summary, cursor): uploaded_file_path = os.path.join( settings.UPLOADED_FILES_DIR, "tmp", self.loadid, file ) - workbook = load_workbook(filename=default_storage.open(uploaded_file_path)) + opened_file = default_storage.open(uploaded_file_path) + workbook = load_workbook(filename=opened_file, read_only=True) graphid = self.get_graphid(workbook) nodegroup_lookup, nodes = self.get_graph_tree(graphid) node_lookup = self.get_node_lookup(nodes) @@ -271,6 +272,8 @@ def stage_excel_file(self, file, summary, cursor): worksheet, cursor, node_lookup, nodegroup_lookup ) summary["files"][file]["worksheets"].append(details) + opened_file.close() + cursor.execute( """UPDATE load_event SET load_details = %s WHERE loadid = %s""", (json.dumps(summary), self.loadid), diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index 65db9c08057..296761d6114 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -31,7 +31,7 @@ def test_unknown_filetypes(self, file, extension=None): errors.append(f"File type is not permitted: {extension}") case "xlsx": try: - load_workbook(io.BytesIO(file)) + load_workbook(io.BytesIO(file), read_only=True) except (InvalidFileException, zipfile.BadZipFile): errors.append("Invalid xlsx workbook") case "csv": diff --git a/tests/utils/test_file_validator.py b/tests/utils/test_file_validator.py index 80aace0e7a3..fa99265a17a 100644 --- a/tests/utils/test_file_validator.py +++ b/tests/utils/test_file_validator.py @@ -116,7 +116,7 @@ def test_check_dsstore_strict(self): self.assertEqual(errors, ["File type is not permitted: DS_Store"]) @patch("filetype.guess", Mock(return_value=None)) - @patch("arches.app.utils.file_validator.load_workbook", lambda noop: None) + @patch("arches.app.utils.file_validator.load_workbook", lambda file, **kwargs: None) def test_valid_xlsx(self): errors = self.validator.validate_file_type(self.mock_file, extension="xlsx") self.assertEqual(errors, [])