Skip to content

Commit

Permalink
Merge pull request #9843 from archesproject/dev/7.5.x
Browse files Browse the repository at this point in the history
Update master with latest in 7.5.x
  • Loading branch information
chiatt authored Jul 27, 2023
2 parents 03b0b1d + afe0941 commit 6346ab3
Show file tree
Hide file tree
Showing 29 changed files with 2,193 additions and 1,564 deletions.
2 changes: 1 addition & 1 deletion arches/app/datatypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def get_tile_data(self, tile):
except:
data = tile["data"]
provisionaledits = tile["provisionaledits"]
if data is not None and len(list(data.keys())) > 0:
if data is not None and any(data.values()):
return data
elif provisionaledits is not None and len(list(provisionaledits.keys())) > 0:
if len(list(provisionaledits.keys())) > 1:
Expand Down
2 changes: 1 addition & 1 deletion arches/app/datatypes/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,7 +1535,7 @@ def append_to_document(self, document, nodevalue, nodeid, tile, provisional=Fals
for f in tile.data[str(nodeid)]:
val = {"string": f["name"], "nodegroup_id": tile.nodegroup_id, "provisional": provisional}
document["strings"].append(val)
except KeyError as e:
except (KeyError, TypeError) as e:
for k, pe in tile.provisionaledits.items():
for f in pe["value"][nodeid]:
val = {"string": f["name"], "nodegroup_id": tile.nodegroup_id, "provisional": provisional}
Expand Down
7 changes: 5 additions & 2 deletions arches/app/datatypes/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ def transform_value_for_tile(self, value, **kwargs):
try:
return ast.literal_eval(value)
except:
# this will probably fail validation, but that is ok. We need the error to report the value.
return value
if isinstance(value, dict):
return value
else:
return {"url": value, "url_label": ""}
except BaseException:
# this will probably fail validation, but that is ok. We need the error to report the value.
if isinstance(value, dict):
return value
else:
Expand Down
39 changes: 16 additions & 23 deletions arches/app/etl_modules/base_data_editor.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
from datetime import datetime
import json
import logging
import requests
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlsplit, parse_qs
import uuid
from django.db import connection
from django.db.models.functions import Lower
from django.http import HttpRequest
from django.utils.translation import ugettext as _
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.etl_modules.base_import_module import BaseImportModule
from arches.app.models.models import GraphModel, Node
from arches.app.models.system_settings import settings
import arches.app.tasks as tasks
from arches.app.utils.index_database import index_resources_by_transaction
from arches.app.views.search import search_results

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,11 +100,16 @@ def log_event(self, cursor, status):
)

def get_resourceids_from_search_url(self, search_url):
parsed_url = urlparse(search_url)
search_result_url = urlunparse(parsed_url._replace(path="/search/resources"))
response = requests.get(search_result_url + "&export=true")
search_results = response.json()["results"]["hits"]["hits"]
return [result["_source"]["resourceinstanceid"] for result in search_results]
request = HttpRequest()
request.user = self.request.user
request.method = "GET"
request.GET["export"] = True
params = parse_qs(urlsplit(search_url).query)
for k, v in params.items():
request.GET.__setitem__(k, v[0])
response = search_results(request)
results = json.loads(response.content)['results']['hits']['hits']
return [result["_source"]["resourceinstanceid"] for result in results]

def validate(self, request):
return {"success": True, "data": {}}
Expand Down Expand Up @@ -168,26 +172,15 @@ def get_preview_data(self, graph_id, node_id, resourceids, language_code, old_te
+ text_query
)

tile_sub_query = (
resource_count_query = (
"""
AND resourceinstanceid IN (SELECT DISTINCT t.resourceinstanceid FROM tiles t, nodes n
SELECT count(DISTINCT t.resourceinstanceid) FROM tiles t, nodes n
WHERE t.nodegroupid = n.nodegroupid
"""
+ node_id_query
+ graph_id_query
+ resourceids_query
+ text_query
+ ")"
)

resource_count_query = (
"""
SELECT count(n.resourceinstanceid) FROM resource_instances n
WHERE 0 = 0
"""
+ graph_id_query
+ resourceids_query
+ tile_sub_query
)

with connection.cursor() as cursor:
Expand Down Expand Up @@ -367,7 +360,7 @@ def run_load_task(self, loadid, graph_id, node_id, operation, language_code, old
return {"success": False, "data": {"title": _("Error"), "message": data_staged["message"]}}

if data_updated["success"]:
data_updated = self.save_to_tiles(loadid)
data_updated = self.save_to_tiles(loadid, finalize_import=False)
return {"success": True, "data": "done"}
else:
with connection.cursor() as cursor:
Expand Down
41 changes: 31 additions & 10 deletions arches/app/etl_modules/base_import_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ def reverse(self, request, **kwargs):
logger.warn(response)
return response

def save_to_tiles(self, loadid, multiprocessing=True):
def save_to_tiles(self, loadid, finalize_import=True, multiprocessing=True):
self.loadid = loadid
with connection.cursor() as cursor:
try:
cursor.execute("""CALL __arches_prepare_bulk_load();""")
cursor.execute("""SELECT * FROM __arches_staging_to_tile(%s)""", [self.loadid])
row = cursor.fetchall()
saved = cursor.fetchone()[0]
except (IntegrityError, ProgrammingError) as e:
logger.error(e)
cursor.execute(
Expand All @@ -66,19 +66,40 @@ def save_to_tiles(self, loadid, multiprocessing=True):
"message": _("Unable to insert record into staging table"),
}
finally:
cursor.execute("""CALL __arches_complete_bulk_load();""")
try:
cursor.execute("""CALL __arches_complete_bulk_load();""")

if row[0][0]:
if finalize_import:
cursor.execute("""SELECT __arches_refresh_spatial_views();""")
refresh_successful = cursor.fetchone()[0]
if not refresh_successful:
raise Exception('Unable to refresh spatial views')
except Exception as e:
logger.exception(e)
cursor.execute(
"""UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""",
("unindexed", datetime.now(), True, True, loadid),
)

if saved:
cursor.execute(
"""UPDATE load_event SET (status, load_end_time) = (%s, %s) WHERE loadid = %s""",
("completed", datetime.now(), loadid),
)
index_resources_by_transaction(loadid, quiet=True, use_multiprocessing=False, recalculate_descriptors=True)
cursor.execute(
"""UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""",
("indexed", datetime.now(), True, True, loadid),
)
return {"success": True, "data": "success"}
try:
index_resources_by_transaction(loadid, quiet=True, use_multiprocessing=False, recalculate_descriptors=True)
cursor.execute(
"""UPDATE load_event SET (status, indexed_time, complete, successful) = (%s, %s, %s, %s) WHERE loadid = %s""",
("indexed", datetime.now(), True, True, loadid),
)
return {"success": True, "data": "indexed"}
except Exception as e:
logger.exception(e)
cursor.execute(
"""UPDATE load_event SET (status, load_end_time) = (%s, %s) WHERE loadid = %s""",
("unindexed", datetime.now(), loadid),
)
return {"success": False, "data": "saved"}
else:
cursor.execute(
"""UPDATE load_event SET status = %s, load_end_time = %s WHERE loadid = %s""",
Expand Down
7 changes: 6 additions & 1 deletion arches/app/etl_modules/branch_csv_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def process_worksheet(self, worksheet, cursor, node_lookup, nodegroup_lookup):
row_count = 0
for row in worksheet.rows:
cell_values = [cell.value for cell in row]
if len(cell_values) == 0:
if len(cell_values) == 0 or any(cell_values) is False:
continue
resourceid = cell_values[0]
if resourceid is None:
Expand Down Expand Up @@ -268,6 +268,11 @@ def read(self, request):
result["summary"]["files"][file.filename] = {"size": (self.filesize_format(file.file_size))}
result["summary"]["cumulative_excel_files_size"] = self.cumulative_excel_files_size
default_storage.save(os.path.join(self.temp_dir, file.filename), File(zip_ref.open(file)))
elif content.name.split(".")[-1] == "xlsx":
self.cumulative_excel_files_size += content.size
result["summary"]["files"][content.name] = {"size": (self.filesize_format(content.size))}
result["summary"]["cumulative_excel_files_size"] = self.cumulative_excel_files_size
default_storage.save(os.path.join(self.temp_dir, content.name), File(content))
return {"success": result, "data": result}

def start(self, request):
Expand Down
3 changes: 2 additions & 1 deletion arches/app/media/css/arches.scss
Original file line number Diff line number Diff line change
Expand Up @@ -12544,7 +12544,8 @@ ul.select2-choices:after {
}

.iiif-widget-report {
margin: 0 0 0 320px;
width: 425px;
margin: 0 20px 10px 0px;
}

.iiif-widget-report .iiif-leaflet {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,8 @@ define([
}

self.addAllFormData();
self.loading(true);
params.activeTab("import");
self.submit('write').then(data => {
params.activeTab("import");
console.log(data.result);
}).fail( function(err) {
self.alert(
Expand Down
12 changes: 12 additions & 0 deletions arches/app/models/fields/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ def _parse(self, value, lang, use_nulls):
if isinstance(value, str) and value != "null":
try:
ret = json.loads(value)

# the following is a fix for issue #9623 - using double quotation marks in i18n input
# re https://github.com/archesproject/arches/issues/9623
# the reason we have to do this next check is that we assumed that if the
# json.loads method doesn't fail we have a python dict. That's usually
# true unless you have a simple string wrapped in quotes
# eg: '"hello world"' rather than simply 'hello world'
# the quoted string loads without error but is not a dict
# hence the need for this check
if not isinstance(ret, dict):
ret = {}
raise Exception("value is not a json object")
except:
ret[lang] = value
self.value_is_primitive = True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("models", "9477_fix_for_spatial_view_dbf_function_edtf_displaying_null"),
]

def forwards_func(apps, schema_editor):
TileModel = apps.get_model("models", "TileModel")
Node = apps.get_model("models", "Node")

for tile in TileModel.objects.filter(data={}, provisionaledits__isnull=False):
for node in Node.objects.filter(nodegroup_id=tile.nodegroup_id):
if not str(node.pk) in tile.data:
tile.data[str(node.pk)] = None
tile.save()

def reverse_func(apps, schema_editor):
TileModel = apps.get_model("models", "TileModel")

for tile in TileModel.objects.filter(provisionaledits__isnull=False):
if bool(tile.provisionaledits and not any(tile.data.values())):
tile.data = {}
tile.save()

operations = [
migrations.RunPython(forwards_func, reverse_func),
]
70 changes: 70 additions & 0 deletions arches/app/models/migrations/9670_improve_bulk_load_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from django.db import migrations

class Migration(migrations.Migration):

dependencies = [
("models", "9648_add_empty_key_value_pairs_to_tiles"),
]

update_check_excess_tiles_trigger = """
create or replace procedure __arches_complete_bulk_load() AS
$$
DECLARE
cardinality_violations bigint;
BEGIN
alter table tiles enable trigger __arches_check_excess_tiles_trigger;
alter table tiles enable trigger __arches_trg_update_spatial_attributes;
END
$$
language plpgsql;
"""

restore_check_excess_tiles_trigger = """
create or replace procedure __arches_complete_bulk_load() as
$$
DECLARE
cardinality_violations bigint;
BEGIN
alter table tiles enable trigger __arches_check_excess_tiles_trigger;
alter table tiles enable trigger __arches_trg_update_spatial_attributes;
if (not __arches_refresh_spatial_views()) then
Raise EXCEPTION 'Unable to refresh spatial views';
end if;
with cardinality_violations as (SELECT t.resourceinstanceid,
t.nodegroupid,
COALESCE(t.parenttileid::text, '') parent_tileid,
count(*)
FROM tiles t,
node_groups ng
WHERE t.nodegroupid = ng.nodegroupid
AND ng.cardinality = '1'
group by t.resourceinstanceid, t.nodegroupid, parent_tileid
having count(*) > 1)
select count(*)
into cardinality_violations
from cardinality_violations;
if (cardinality_violations > 0) then
Raise Exception 'Cardinality violations found. Run `%` to list violations',
'select * from __arches_get_tile_cardinality_violations()';
else
Raise Notice 'No cardinality violations found';
end if;
END $$
language plpgsql;
"""

create_index_on_load_staging_tileid = """
CREATE INDEX IF NOT EXISTS load_staging_tileid ON load_staging (tileid);
"""

drop_index_on_load_staging_tileid = """
DROP INDEX IF EXISTS load_staging_tileid;
"""

operations = [
migrations.RunSQL(update_check_excess_tiles_trigger, restore_check_excess_tiles_trigger),
migrations.RunSQL(create_index_on_load_staging_tileid, drop_index_on_load_staging_tileid),
]
Loading

0 comments on commit 6346ab3

Please sign in to comment.