From 1af1b0500f904b28fc1741bb6bd478b6e75d3772 Mon Sep 17 00:00:00 2001 From: NextGenEng <58440325+THOR300@users.noreply.github.com> Date: Thu, 13 Apr 2023 09:40:25 +0100 Subject: [PATCH] Bugfix/faciliate redownload on source url change (#39) Should a source url change we want to facilitate redownload from the source url. This bugfix enables that. This PR looks like alot of file changes but there is really only two modules that have simple changes (the parse function itself and the test for the function), the rest of the changes are to the expected test data for the integration tests. REVIEWERS CONFIRM THE BELOW Storing documents in the cdn: Should a source url change for the document, we would treat it as a new document. The new cdn key / path would be generated, and the document uploaded to the cdn. The cdn key / path is a function of the doc title and md5sum. Thus, we should only ever have the exact same cdn path if the content of the document is exactly the same. Thus we would silently overwrite the pdf document stored in the cdn. --------- Co-authored-by: Mark --- HOW_TO_UPDATE_TESTS.md | 22 +++- .../input/new_and_updated_documents.json | 96 ++++++++++++++++++ .../2023-03-29-17-29-45..json | 1 - .../2023-04-12-13-01-01..json | 32 ++++++ .../2023-03-29-17-29-47..json | 1 - .../2023-04-12-13-01-06..json | 1 + ...9-17-29-45.npy => 2023-04-12-13-01-05.npy} | Bin ...9-17-29-45.npy => 2023-04-12-13-01-05.npy} | Bin .../2023-03-29-17-29-45..json | 1 - .../2023-04-12-13-01-01..json | 32 ++++++ ...17-29-45..npy => 2023-04-12-13-01-01..npy} | Bin .../2023-03-29-17-29-47..json | 1 - ...9-17-29-45.npy => 2023-04-12-13-01-05.npy} | Bin .../2023-04-12-13-01-06..json | 1 + .../2023-04-12-13-01-01..json | 11 ++ .../2023-04-12-13-01-06..json | 1 + .../TESTCCLW.executive.3.3.json | 57 ++++++++++- .../TESTCCLW.executive.4.4.json | 57 ++++++++++- .../input/new_and_updated_documents.json | 96 ++++++++++++++++++ .../new_and_updated_documents.json_errors | 34 ++++--- .../base/updated_document_actions.py | 33 +----- .../tests/test_update_actions.py | 10 +- 22 files changed, 421 insertions(+), 66 deletions(-) delete mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json delete mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json rename integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.1.1/{2023-03-29-17-29-45.npy => 2023-04-12-13-01-05.npy} (100%) rename integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.2.2/{2023-03-29-17-29-45.npy => 2023-04-12-13-01-05.npy} (100%) delete mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json rename integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/{2023-03-29-17-29-45..npy => 2023-04-12-13-01-01..npy} (100%) delete mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json rename integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/{2023-03-29-17-29-45.npy => 2023-04-12-13-01-05.npy} (100%) create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json create mode 100644 integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json diff --git a/HOW_TO_UPDATE_TESTS.md b/HOW_TO_UPDATE_TESTS.md index b15bbbc7..a2bc06a1 100644 --- a/HOW_TO_UPDATE_TESTS.md +++ b/HOW_TO_UPDATE_TESTS.md @@ -22,20 +22,34 @@ Build the docker image locally make build_test + +MAKE SURE YOU HAVE THE CORRECT AWS CREDENTIALS SET UP. + + export AWS_PROFILE=${PROFILE_NAME} + Set up the test buckets - python setup_test_buckets ${document_bucket} ${pipeline_bucket} ${region} + python -m integration_tests.setup_test_buckets ${document_bucket} ${pipeline_bucket} ${region} Sync the test data to the s3 bucket aws s3 sync integration_tests/data/pipeline_in s3://${pipeline_bucket} -Run the docker image +Run the docker image. If you are trying to figure out what the variables are look in the env var section of the following file: .github/workflows/integration-tests.yml. Also note that the prefixes used must match the subdirectory names of the data/pipeline_in directory. docker run -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} -e API_HOST="" -e MACHINE_USER_EMAIL="" -e MACHINE_USER_PASSWORD="" navigator-data-ingest-test --pipeline-bucket ${PIPELINE_BUCKET} --document-bucket ${DOCUMENT_BUCKET} --input-file ${TEST_DATA_UPLOAD_PATH} --output-prefix ${OUTPUT_PREFIX} --embeddings-input-prefix ${EMBEDDINGS_INPUT_PREFIX} --indexer-input-prefix ${INDEXER_INPUT_PREFIX} -Assert that the output is correct and if so snyc the data locally to the pipeline_out directory +Example: + + docker run -e AWS_ACCESS_KEY_ID=XXXX -e AWS_SECRET_ACCESS_KEY=XXXX -e API_HOST="" -e MACHINE_USER_EMAIL="" -e MACHINE_USER_PASSWORD="" navigator-data-ingest-test --pipeline-bucket pipbucket123123123 --document-bucket docbucket123123123 --input-file input/new_and_updated_documents.json --output-prefix ingest_unit_test_parser_input --embeddings-input-prefix ingest_unit_test_embeddings_input --indexer-input-prefix ingest_unit_test_indexer_input + + +Assert that the output is correct and if so manually delete all the files in the pipeline_out directory and sync the data locally to the pipeline_out directory cd integration_tests/data/pipeline_out - aws s3 sync s3://${pipeline_bucket}/ . \ No newline at end of file + aws s3 sync s3://${pipeline_bucket}/ . + +Remove the test buckets + + python -m integration_tests.remove_test_buckets ${document_bucket} ${pipeline_bucket} ${region} \ No newline at end of file diff --git a/integration_tests/data/pipeline_in/input/new_and_updated_documents.json b/integration_tests/data/pipeline_in/input/new_and_updated_documents.json index 59670d13..c2f28066 100644 --- a/integration_tests/data/pipeline_in/input/new_and_updated_documents.json +++ b/integration_tests/data/pipeline_in/input/new_and_updated_documents.json @@ -838,6 +838,102 @@ } ], "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" + }, + { + "publication_ts": "2013-01-01T00:00:00", + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "The Decision no 1386/2013/EU sets up the General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet'. It adopts the '7th Environment Action programme' or \u20187th EAP'. The priority objectives of the 7th EAP are: (a) to protect, conserve and enhance the Union's natural capital; (b) to turn the Union into a resource-efficient, green and competitive low-carbon economy; (c) to safeguard the Union's citizens from environment-related pressures and risks to health and well-being; (d) to maximise the benefits of Union environment legislation by improving implementation; (e) to improve the knowledge and evidence base for Union environment policy; (f) to secure investment for environment and climate policy and address environmental externalities; (g) to improve environmental integration and policy coherence; (h) to enhance the sustainability of the Union's cities; (i) to increase the Union's effectiveness in addressing inter\u00ad national environmental and climate-related challenges.", + "source_url": "http://existing.com", + "url": null, + "md5_sum": null, + "type": "EU Decision", + "source": "CCLW", + "import_id": "TESTCCLW.executive.3.3", + "category": "Law", + "frameworks": [], + "geography": "EUR", + "hazards": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ], + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" + }, + { + "publication_ts": "2013-01-01T00:00:00", + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "description", + "source_url": "http://existing.com", + "url": null, + "md5_sum": null, + "type": "EU Decision", + "source": "CCLW", + "import_id": "TESTCCLW.executive.4.4", + "category": "Law", + "frameworks": [], + "geography": "EUR", + "hazards": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ], + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" } ], "updated_documents": { diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json deleted file mode 100644 index 3daf5f83..00000000 --- a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json +++ /dev/null @@ -1 +0,0 @@ -{"document_name": "name", "document_description": "description", "document_id": "TESTCCLW.executive.3.3", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json new file mode 100644 index 00000000..3199a55f --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json @@ -0,0 +1,32 @@ +{ + "document_name": "name", + "document_description": "description", + "document_id": "TESTCCLW.executive.3.3", + "document_source_url": "http://existing.com", + "document_cdn_object": null, + "document_content_type": "text/html", + "document_md5_sum": null, + "document_metadata": {}, + "document_slug": "fake_slug", + "languages": [ + "en" + ], + "translated": false, + "html_data": { + "detected_title": "One Stop Shop Service", + "detected_date": null, + "has_valid_text": true, + "text_blocks": [ + { + "text": [ + "Why use a One Stop Shop" + ], + "text_block_id": "b0", + "language": "en", + "type": "Text", + "type_confidence": 1.0 + } + ] + }, + "pdf_data": null +} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json deleted file mode 100644 index a829e058..00000000 --- a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json +++ /dev/null @@ -1 +0,0 @@ -{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json new file mode 100644 index 00000000..cbe861b6 --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_embeddings_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json @@ -0,0 +1 @@ +{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://existing.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.1.1/2023-03-29-17-29-45.npy b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.1.1/2023-04-12-13-01-05.npy similarity index 100% rename from integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.1.1/2023-03-29-17-29-45.npy rename to integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.1.1/2023-04-12-13-01-05.npy diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.2.2/2023-03-29-17-29-45.npy b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.2.2/2023-04-12-13-01-05.npy similarity index 100% rename from integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.2.2/2023-03-29-17-29-45.npy rename to integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.2.2/2023-04-12-13-01-05.npy diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json deleted file mode 100644 index 3daf5f83..00000000 --- a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..json +++ /dev/null @@ -1 +0,0 @@ -{"document_name": "name", "document_description": "description", "document_id": "TESTCCLW.executive.3.3", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json new file mode 100644 index 00000000..3199a55f --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json @@ -0,0 +1,32 @@ +{ + "document_name": "name", + "document_description": "description", + "document_id": "TESTCCLW.executive.3.3", + "document_source_url": "http://existing.com", + "document_cdn_object": null, + "document_content_type": "text/html", + "document_md5_sum": null, + "document_metadata": {}, + "document_slug": "fake_slug", + "languages": [ + "en" + ], + "translated": false, + "html_data": { + "detected_title": "One Stop Shop Service", + "detected_date": null, + "has_valid_text": true, + "text_blocks": [ + { + "text": [ + "Why use a One Stop Shop" + ], + "text_block_id": "b0", + "language": "en", + "type": "Text", + "type_confidence": 1.0 + } + ] + }, + "pdf_data": null +} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..npy b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..npy similarity index 100% rename from integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-03-29-17-29-45..npy rename to integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..npy diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json deleted file mode 100644 index a829e058..00000000 --- a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-47..json +++ /dev/null @@ -1 +0,0 @@ -{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-45.npy b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-05.npy similarity index 100% rename from integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-03-29-17-29-45.npy rename to integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-05.npy diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json new file mode 100644 index 00000000..cbe861b6 --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_indexer_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json @@ -0,0 +1 @@ +{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://existing.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug", "languages": ["en"], "translated": false, "html_data": {"detected_title": "One Stop Shop Service", "detected_date": null, "has_valid_text": true, "text_blocks": [{"text": ["Why use a One Stop Shop"], "text_block_id": "b0", "language": "en", "type": "Text", "type_confidence": 1.0}]}, "pdf_data": null} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json new file mode 100644 index 00000000..70403c76 --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.3.3/2023-04-12-13-01-01..json @@ -0,0 +1,11 @@ +{ + "document_name": "name", + "document_description": "description", + "document_id": "TESTCCLW.executive.3.3", + "document_source_url": "http://existing.com", + "document_cdn_object": null, + "document_content_type": "text/html", + "document_md5_sum": null, + "document_metadata": {}, + "document_slug": "fake_slug" +} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json b/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json new file mode 100644 index 00000000..e38493ba --- /dev/null +++ b/integration_tests/data/pipeline_out/archive/ingest_unit_test_parser_input/TESTCCLW.executive.4.4/2023-04-12-13-01-06..json @@ -0,0 +1 @@ +{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://existing.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug"} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.3.3.json b/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.3.3.json index ffe60e54..6f51010c 100644 --- a/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.3.3.json +++ b/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.3.3.json @@ -1 +1,56 @@ -{"document_name": "name", "document_description": "description", "document_id": "TESTCCLW.executive.3.3", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug"} \ No newline at end of file +{ + "document_name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "document_description": "The Decision no 1386/2013/EU sets up the General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet'. It adopts the '7th Environment Action programme' or \u20187th EAP'. The priority objectives of the 7th EAP are: (a) to protect, conserve and enhance the Union's natural capital; (b) to turn the Union into a resource-efficient, green and competitive low-carbon economy; (c) to safeguard the Union's citizens from environment-related pressures and risks to health and well-being; (d) to maximise the benefits of Union environment legislation by improving implementation; (e) to improve the knowledge and evidence base for Union environment policy; (f) to secure investment for environment and climate policy and address environmental externalities; (g) to improve environmental integration and policy coherence; (h) to enhance the sustainability of the Union's cities; (i) to increase the Union's effectiveness in addressing inter\u00ad national environmental and climate-related challenges.", + "document_id": "TESTCCLW.executive.3.3", + "document_source_url": "http://existing.com", + "document_cdn_object": null, + "document_content_type": null, + "document_md5_sum": null, + "document_metadata": { + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "The Decision no 1386/2013/EU sets up the General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet'. It adopts the '7th Environment Action programme' or \u20187th EAP'. The priority objectives of the 7th EAP are: (a) to protect, conserve and enhance the Union's natural capital; (b) to turn the Union into a resource-efficient, green and competitive low-carbon economy; (c) to safeguard the Union's citizens from environment-related pressures and risks to health and well-being; (d) to maximise the benefits of Union environment legislation by improving implementation; (e) to improve the knowledge and evidence base for Union environment policy; (f) to secure investment for environment and climate policy and address environmental externalities; (g) to improve environmental integration and policy coherence; (h) to enhance the sustainability of the Union's cities; (i) to increase the Union's effectiveness in addressing inter\u00ad national environmental and climate-related challenges.", + "import_id": "TESTCCLW.executive.3.3", + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017", + "publication_ts": "2013-01-01T00:00:00", + "source_url": "http://existing.com", + "type": "EU Decision", + "source": "CCLW", + "category": "Law", + "geography": "EUR", + "frameworks": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "hazards": [], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ] + }, + "document_slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" +} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.4.4.json b/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.4.4.json index 9e280b63..77aaeb85 100644 --- a/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.4.4.json +++ b/integration_tests/data/pipeline_out/ingest_unit_test_parser_input/TESTCCLW.executive.4.4.json @@ -1 +1,56 @@ -{"document_name": "name", "document_description": "new description", "document_id": "TESTCCLW.executive.4.4", "document_source_url": "http://new.com", "document_cdn_object": null, "document_content_type": "text/html", "document_md5_sum": null, "document_metadata": {}, "document_slug": "fake_slug"} \ No newline at end of file +{ + "document_name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "document_description": "description", + "document_id": "TESTCCLW.executive.4.4", + "document_source_url": "http://existing.com", + "document_cdn_object": null, + "document_content_type": null, + "document_md5_sum": null, + "document_metadata": { + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "description", + "import_id": "TESTCCLW.executive.4.4", + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017", + "publication_ts": "2013-01-01T00:00:00", + "source_url": "http://existing.com", + "type": "EU Decision", + "source": "CCLW", + "category": "Law", + "geography": "EUR", + "frameworks": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "hazards": [], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ] + }, + "document_slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" +} \ No newline at end of file diff --git a/integration_tests/data/pipeline_out/input/new_and_updated_documents.json b/integration_tests/data/pipeline_out/input/new_and_updated_documents.json index 59670d13..c2f28066 100644 --- a/integration_tests/data/pipeline_out/input/new_and_updated_documents.json +++ b/integration_tests/data/pipeline_out/input/new_and_updated_documents.json @@ -838,6 +838,102 @@ } ], "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" + }, + { + "publication_ts": "2013-01-01T00:00:00", + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "The Decision no 1386/2013/EU sets up the General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet'. It adopts the '7th Environment Action programme' or \u20187th EAP'. The priority objectives of the 7th EAP are: (a) to protect, conserve and enhance the Union's natural capital; (b) to turn the Union into a resource-efficient, green and competitive low-carbon economy; (c) to safeguard the Union's citizens from environment-related pressures and risks to health and well-being; (d) to maximise the benefits of Union environment legislation by improving implementation; (e) to improve the knowledge and evidence base for Union environment policy; (f) to secure investment for environment and climate policy and address environmental externalities; (g) to improve environmental integration and policy coherence; (h) to enhance the sustainability of the Union's cities; (i) to increase the Union's effectiveness in addressing inter\u00ad national environmental and climate-related challenges.", + "source_url": "http://existing.com", + "url": null, + "md5_sum": null, + "type": "EU Decision", + "source": "CCLW", + "import_id": "TESTCCLW.executive.3.3", + "category": "Law", + "frameworks": [], + "geography": "EUR", + "hazards": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ], + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" + }, + { + "publication_ts": "2013-01-01T00:00:00", + "name": "DECISION No 1386/2013/EU OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 20 November 2013 on a General Union Environment Action Programme to 2020 \u2018Living well, within the limits of our planet\u2019", + "description": "description", + "source_url": "http://existing.com", + "url": null, + "md5_sum": null, + "type": "EU Decision", + "source": "CCLW", + "import_id": "TESTCCLW.executive.4.4", + "category": "Law", + "frameworks": [], + "geography": "EUR", + "hazards": [], + "instruments": [ + "Capacity building|Governance", + "Education, training and knowledge dissemination|Information" + ], + "keywords": [ + "Adaptation", + "Institutions / Administrative Arrangements", + "Research And Development", + "Energy Supply", + "Energy Demand", + "REDD+ And LULUCF", + "Transport" + ], + "languages": [ + "English" + ], + "sectors": [ + "Economy-wide", + "Health", + "Transport" + ], + "topics": [ + "Adaptation", + "Mitigation" + ], + "events": [ + { + "name": "Law passed", + "description": "", + "created_ts": "2013-11-20T00:00:00" + } + ], + "slug": "european-union_2013_decision-no-13862013eu-of-the-european-parliament-and-of-the-council-of-20-november-2013-on-a-general-union-environment-action-programme-to-2020-living-well-within-the-limits-of-our-planet_8570_3017" } ], "updated_documents": { diff --git a/integration_tests/data/pipeline_out/input/new_and_updated_documents.json_errors b/integration_tests/data/pipeline_out/input/new_and_updated_documents.json_errors index 2483a00c..2b5e34f6 100644 --- a/integration_tests/data/pipeline_out/input/new_and_updated_documents.json_errors +++ b/integration_tests/data/pipeline_out/input/new_and_updated_documents.json_errors @@ -1,18 +1,20 @@ [ - "ERROR ingesting 'TESTCCLW.executive.1332.1548': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1547': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1549': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1550': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1551': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1554': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1553': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1555': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1552': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1556': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1559': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1562': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1557': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1558': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1560': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", - "ERROR ingesting 'TESTCCLW.executive.1332.1563': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n" + "ERROR ingesting 'TESTCCLW.executive.1332.1550': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1547': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1549': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1548': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1553': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1551': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1555': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1554': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1552': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1556': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1560': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1559': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1557': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1558': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1563': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.1332.1562': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.3.3': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n", + "ERROR ingesting 'TESTCCLW.executive.4.4': Traceback (most recent call last):\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 382, in __call__\n result = fn(*args, **kwargs)\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 175, in update_document_details\n token = get_machine_user_token()\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/api_client.py\", line 49, in get_machine_user_token\n get_token_response = requests.post(f\"{api_host}/api/tokens\", data=login_data)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 115, in post\n return request(\"post\", url, data=data, json=json, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/api.py\", line 59, in request\n return session.request(method=method, url=url, **kwargs)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 573, in request\n prep = self.prepare_request(req)\n File \"/usr/local/lib/python3.9/site-packages/requests/sessions.py\", line 484, in prepare_request\n p.prepare(\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 368, in prepare\n self.prepare_url(url, params)\n File \"/usr/local/lib/python3.9/site-packages/requests/models.py\", line 439, in prepare_url\n raise MissingSchema(\nrequests.exceptions.MissingSchema: Invalid URL '/api/tokens': No scheme supplied. Perhaps you meant https:///api/tokens?\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/navigator-data-ingest/src/navigator_data_ingest/base/new_document_actions.py\", line 142, in _handle_document\n update_document_details(\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 289, in wrapped_f\n return self(f, *args, **kw)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 379, in __call__\n do = self.iter(retry_state=retry_state)\n File \"/usr/local/lib/python3.9/site-packages/tenacity/__init__.py\", line 326, in iter\n raise retry_exc from fut.exception()\ntenacity.RetryError: RetryError[]\n" ] \ No newline at end of file diff --git a/src/navigator_data_ingest/base/updated_document_actions.py b/src/navigator_data_ingest/base/updated_document_actions.py index 8fc86dec..496a8675 100644 --- a/src/navigator_data_ingest/base/updated_document_actions.py +++ b/src/navigator_data_ingest/base/updated_document_actions.py @@ -203,7 +203,7 @@ def parse( """ document_id, document_update = update _LOGGER.info( - "Updating document so as to parse during the next run.", + "Archiving document so as to re-download from source and parse during the next run.", extra={ "props": { "document_id": document_id, @@ -211,39 +211,10 @@ def parse( }, ) errors = [] - for prefix_path in [ - S3Path( - os.path.join( - "s3://", update_config.pipeline_bucket, update_config.parser_input - ) - ), - S3Path( - os.path.join( - "s3://", update_config.pipeline_bucket, update_config.embeddings_input - ) - ), - S3Path( - os.path.join( - "s3://", update_config.pipeline_bucket, update_config.indexer_input - ) - ), - ]: - # Might be translated and non-translated json objects - document_files = get_document_files( - prefix_path, document_id, suffix_filter="json" - ) - for document_file in document_files: - errors.append( - update_file_field( - document_path=document_file, - field=str(document_update.type.value), - new_value=document_update.csv_value, - existing_value=document_update.db_value, - ) - ) timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") for prefix in [ + update_config.parser_input, update_config.embeddings_input, update_config.indexer_input, ]: diff --git a/src/navigator_data_ingest/tests/test_update_actions.py b/src/navigator_data_ingest/tests/test_update_actions.py index b9063e5a..1068d3d3 100644 --- a/src/navigator_data_ingest/tests/test_update_actions.py +++ b/src/navigator_data_ingest/tests/test_update_actions.py @@ -184,16 +184,8 @@ def test_parse( for s3_key in s3_document_keys ] - assert parser_input_doc.exists() + assert not parser_input_doc.exists() assert not embeddings_input_doc.exists() assert not embeddings_input_translated_doc.exists() assert not indexer_input_doc_json.exists() assert not indexer_input_doc_npy.exists() - - parser_input_doc_data = json.loads(parser_input_doc.read_text()) - assert ( - parser_input_doc_data[ - PipelineFieldMapping[UpdateTypes(update_to_source_url.type)] - ] - == update_to_source_url.csv_value - )