From 89dc9b1c02ab057f54471b478a4527da8b875003 Mon Sep 17 00:00:00 2001 From: njkim Date: Mon, 3 Jul 2023 15:59:33 -0700 Subject: [PATCH 1/7] Update trigger function and add index, #9671 --- .../9670_improve_bulk_load_performance.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 arches/app/models/migrations/9670_improve_bulk_load_performance.py diff --git a/arches/app/models/migrations/9670_improve_bulk_load_performance.py b/arches/app/models/migrations/9670_improve_bulk_load_performance.py new file mode 100644 index 00000000000..6139258088b --- /dev/null +++ b/arches/app/models/migrations/9670_improve_bulk_load_performance.py @@ -0,0 +1,96 @@ +from django.db import migrations + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "8010_export_permissions"), + ] + + update_check_excess_tiles_trigger = """ + create or replace procedure __arches_complete_bulk_load() AS + $$ + DECLARE + cardinality_violations bigint; + BEGIN + alter table tiles enable trigger __arches_check_excess_tiles_trigger; + alter table tiles enable trigger __arches_trg_update_spatial_attributes; + + if (not __arches_refresh_spatial_views()) then + Raise EXCEPTION 'Unable to refresh spatial views'; + end if; + + with cardinality_violations as (SELECT t.resourceinstanceid, + t.nodegroupid, + COALESCE(t.parenttileid::text, '') parent_tileid, + count(*) + FROM tiles t, + node_groups ng + WHERE t.nodegroupid = ng.nodegroupid + AND ng.cardinality = '1' + group by t.resourceinstanceid, t.nodegroupid, parent_tileid + having count(*) > 1 + LIMIT 1) + select count(*) + into cardinality_violations + from cardinality_violations; + + if (cardinality_violations > 0) then + Raise Exception 'Cardinality violations found. Run `%` to list violations', + 'select * from __arches_get_tile_cardinality_violations()'; + else + Raise Notice 'No cardinality violations found'; + end if; + END + $$ + language plpgsql; + """ + + restore_check_excess_tiles_trigger = """ + create or replace procedure __arches_complete_bulk_load() as + $$ + DECLARE + cardinality_violations bigint; + BEGIN + alter table tiles enable trigger __arches_check_excess_tiles_trigger; + alter table tiles enable trigger __arches_trg_update_spatial_attributes; + + if (not __arches_refresh_spatial_views()) then + Raise EXCEPTION 'Unable to refresh spatial views'; + end if; + + with cardinality_violations as (SELECT t.resourceinstanceid, + t.nodegroupid, + COALESCE(t.parenttileid::text, '') parent_tileid, + count(*) + FROM tiles t, + node_groups ng + WHERE t.nodegroupid = ng.nodegroupid + AND ng.cardinality = '1' + group by t.resourceinstanceid, t.nodegroupid, parent_tileid + having count(*) > 1) + select count(*) + into cardinality_violations + from cardinality_violations; + + if (cardinality_violations > 0) then + Raise Exception 'Cardinality violations found. Run `%` to list violations', + 'select * from __arches_get_tile_cardinality_violations()'; + else + Raise Notice 'No cardinality violations found'; + end if; + END $$ + language plpgsql; + """ + + create_tileid_index_on_load_staging = """ + CREATE INDEX IF NOT EXISTS load_staging_tileid ON load_staging; + """ + + drop_tileid_index_on_load_staging = """ + DROP INDEX IF EXISTS load_staging_tileid; + """ + + operations = [ + migrations.RunSQL(update_check_excess_tiles_trigger, restore_check_excess_tiles_trigger), + migrations.RunSQL(create_tileid_index_on_load_staging, drop_tileid_index_on_load_staging), + ] From ceaea6c23cc21ce20028297561db31f5675c26e0 Mon Sep 17 00:00:00 2001 From: njkim Date: Mon, 3 Jul 2023 16:02:40 -0700 Subject: [PATCH 2/7] Skip check cardinality for editor, #9669 --- arches/app/etl_modules/base_data_editor.py | 2 +- arches/app/etl_modules/base_import_module.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arches/app/etl_modules/base_data_editor.py b/arches/app/etl_modules/base_data_editor.py index 7aa8b118575..8389bd68436 100644 --- a/arches/app/etl_modules/base_data_editor.py +++ b/arches/app/etl_modules/base_data_editor.py @@ -367,7 +367,7 @@ def run_load_task(self, loadid, graph_id, node_id, operation, language_code, old return {"success": False, "data": {"title": _("Error"), "message": data_staged["message"]}} if data_updated["success"]: - data_updated = self.save_to_tiles(loadid) + data_updated = self.save_to_tiles(loadid, finalize_import=False) return {"success": True, "data": "done"} else: with connection.cursor() as cursor: diff --git a/arches/app/etl_modules/base_import_module.py b/arches/app/etl_modules/base_import_module.py index a95bed8d76c..b80ebd75a24 100644 --- a/arches/app/etl_modules/base_import_module.py +++ b/arches/app/etl_modules/base_import_module.py @@ -46,13 +46,13 @@ def reverse(self, request, **kwargs): logger.warn(response) return response - def save_to_tiles(self, loadid, multiprocessing=True): + def save_to_tiles(self, loadid, finalize_import=True, multiprocessing=True): self.loadid = loadid with connection.cursor() as cursor: try: cursor.execute("""CALL __arches_prepare_bulk_load();""") cursor.execute("""SELECT * FROM __arches_staging_to_tile(%s)""", [self.loadid]) - row = cursor.fetchall() + saved = cursor.fetchone()[0] except (IntegrityError, ProgrammingError) as e: logger.error(e) cursor.execute( @@ -66,9 +66,16 @@ def save_to_tiles(self, loadid, multiprocessing=True): "message": _("Unable to insert record into staging table"), } finally: - cursor.execute("""CALL __arches_complete_bulk_load();""") + cursor.execute("""alter table tiles enable trigger __arches_check_excess_tiles_trigger;""") + cursor.execute("""alter table tiles enable trigger __arches_trg_update_spatial_attributes;""") - if row[0][0]: + if finalize_import: + cursor.execute("""SELECT __arches_refresh_spatial_views();""") + refresh_successful = cursor.fetchone()[0] + if not refresh_successful: + raise Exception('Unable to refresh spatial views') + + if saved: cursor.execute( """UPDATE load_event SET (status, load_end_time) = (%s, %s) WHERE loadid = %s""", ("completed", datetime.now(), loadid), From e387d6834a3ba5b989e32633f26c68753ecb1e84 Mon Sep 17 00:00:00 2001 From: njkim Date: Mon, 3 Jul 2023 16:03:52 -0700 Subject: [PATCH 3/7] Try catching error after load before index, #9671 --- arches/app/etl_modules/base_import_module.py | 41 +++++++++++++------ arches/app/templates/javascript.htm | 1 + .../views/components/plugins/etl-manager.htm | 6 +++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arches/app/etl_modules/base_import_module.py b/arches/app/etl_modules/base_import_module.py index b80ebd75a24..95e06441068 100644 --- a/arches/app/etl_modules/base_import_module.py +++ b/arches/app/etl_modules/base_import_module.py @@ -66,26 +66,41 @@ def save_to_tiles(self, loadid, finalize_import=True, multiprocessing=True): "message": _("Unable to insert record into staging table"), } finally: - cursor.execute("""alter table tiles enable trigger __arches_check_excess_tiles_trigger;""") - cursor.execute("""alter table tiles enable trigger __arches_trg_update_spatial_attributes;""") + try: + cursor.execute("""alter table tiles enable trigger __arches_check_excess_tiles_trigger;""") + cursor.execute("""alter table tiles enable trigger __arches_trg_update_spatial_attributes;""") - if finalize_import: - cursor.execute("""SELECT __arches_refresh_spatial_views();""") - refresh_successful = cursor.fetchone()[0] - if not refresh_successful: - raise Exception('Unable to refresh spatial views') + if finalize_import: + cursor.execute("""SELECT __arches_refresh_spatial_views();""") + refresh_successful = cursor.fetchone()[0] + if not refresh_successful: + raise Exception('Unable to refresh spatial views') + except Exception as e: + logger.exception(e) + cursor.execute( + """UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""", + ("unindexed", datetime.now(), True, True, loadid), + ) if saved: cursor.execute( """UPDATE load_event SET (status, load_end_time) = (%s, %s) WHERE loadid = %s""", ("completed", datetime.now(), loadid), ) - index_resources_by_transaction(loadid, quiet=True, use_multiprocessing=False, recalculate_descriptors=True) - cursor.execute( - """UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""", - ("indexed", datetime.now(), True, True, loadid), - ) - return {"success": True, "data": "success"} + try: + index_resources_by_transaction(loadid, quiet=True, use_multiprocessing=False, recalculate_descriptors=True) + cursor.execute( + """UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""", + ("indexed", datetime.now(), True, True, loadid), + ) + return {"success": True, "data": "indexed"} + except Exception as e: + logger.exception(e) + cursor.execute( + """UPDATE load_event SET (status, load_end_time) = (%s, %s) WHERE loadid = %s""", + ("unindexed", datetime.now(), loadid), + ) + return {"success": False, "data": "saved"} else: cursor.execute( """UPDATE load_event SET status = %s, load_end_time = %s WHERE loadid = %s""", diff --git a/arches/app/templates/javascript.htm b/arches/app/templates/javascript.htm index 6e223c6adf9..079ef31755e 100644 --- a/arches/app/templates/javascript.htm +++ b/arches/app/templates/javascript.htm @@ -735,6 +735,7 @@ remove-from-history='{% trans "remove from history" as removeFromHistory %} "{{ removeFromHistory|escapejs }}"' number-of-resources-updated='{% trans "Number of Resources Updated" as numberOfResourcesUpdated %} "{{ numberOfResourcesUpdated|escapejs }}"' indexing='{% trans "indexing" as indexing %} "{{ indexing|escapejs }}"' + loaded-but-unindexed='{% trans "loaded but unindexed" as loadedButUnindexed %} "{{ loadedButUnindexed|escapejs }}"' validating='{% trans "validating" as validating %} "{{ validating|escapejs }}"' completed='{% trans "completed" as completed %} "{{ completed|escapejs }}"' failed='{% trans "failed" as failed %} "{{ failed|escapejs }}"' diff --git a/arches/app/templates/views/components/plugins/etl-manager.htm b/arches/app/templates/views/components/plugins/etl-manager.htm index 3bafd6f0a8a..17e25b09e06 100644 --- a/arches/app/templates/views/components/plugins/etl-manager.htm +++ b/arches/app/templates/views/components/plugins/etl-manager.htm @@ -143,6 +143,12 @@

+ + +