diff --git a/CHANGELOG.md b/CHANGELOG.md index 133fc6726a..eb3b89a176 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,15 @@ -## 0.14.1-dev0 +## 0.14.1-dev1 * **Add support for Python 3.12**. `unstructured` now works with Python 3.12! ### Features +* **Large improvements to the ingest process:** + * Support for multiprocessing and async, with limits for both. + * Streamlined to process when mapping CLI invocations to the underlying code + * More granular steps introduced to give better control over process (i.e. dedicated step to uncompress files already in the local filesystem, new optional staging step before upload) + * Use the python client when calling the unstructured api for partitioning or chunking + * Saving the final content is now a dedicated destination connector (local) set as the default if none are provided. Avoids adding new files locally if uploading elsewhere. + * Leverage last modified date when deciding if new files should be downloaded and reprocessed. ### Fixes diff --git a/examples/ingest/chroma/ingest.sh b/examples/ingest/chroma/ingest.sh index 4f535d25e0..c0cd945c4f 100644 --- a/examples/ingest/chroma/ingest.sh +++ b/examples/ingest/chroma/ingest.sh @@ -16,7 +16,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1p.txt \ --output-dir local-to-chroma \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/clarifai/ingest.sh b/examples/ingest/clarifai/ingest.sh index 9658d859a8..ecd6364092 100644 --- a/examples/ingest/clarifai/ingest.sh +++ b/examples/ingest/clarifai/ingest.sh @@ -10,7 +10,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-output-to-clarifai \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --num-processes 2 \ --verbose \ clarifai \ diff --git a/examples/ingest/elasticsearch/destination.sh b/examples/ingest/elasticsearch/destination.sh index 9986a3d672..9802ef7517 100755 --- a/examples/ingest/elasticsearch/destination.sh +++ b/examples/ingest/elasticsearch/destination.sh @@ -15,7 +15,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-elasticsearch \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/mongodb/destination.sh b/examples/ingest/mongodb/destination.sh index e27334c767..0e3089ea5d 100755 --- a/examples/ingest/mongodb/destination.sh +++ b/examples/ingest/mongodb/destination.sh @@ -15,7 +15,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-mongodb \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/opensearch/destination.sh b/examples/ingest/opensearch/destination.sh index 0642ab2250..c17d6dae50 100755 --- a/examples/ingest/opensearch/destination.sh +++ b/examples/ingest/opensearch/destination.sh @@ -15,7 +15,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-opensearch \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/pinecone/ingest.sh b/examples/ingest/pinecone/ingest.sh index d18bd1c1e0..6d1ba6dac2 100755 --- a/examples/ingest/pinecone/ingest.sh +++ b/examples/ingest/pinecone/ingest.sh @@ -16,7 +16,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-pinecone \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/qdrant/ingest.sh b/examples/ingest/qdrant/ingest.sh index b2595a943d..f6b3d3ba31 100644 --- a/examples/ingest/qdrant/ingest.sh +++ b/examples/ingest/qdrant/ingest.sh @@ -12,7 +12,7 @@ unstructured-ingest \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-output-to-qdrant \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "$EMBEDDING_PROVIDER" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/sql/ingest.sh b/examples/ingest/sql/ingest.sh index f5cfa3fa74..9ac064cfc9 100755 --- a/examples/ingest/sql/ingest.sh +++ b/examples/ingest/sql/ingest.sh @@ -12,7 +12,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1225p.txt \ --output-dir local-to-pinecone \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --embedding-provider "" \ --num-processes 2 \ --verbose \ diff --git a/examples/ingest/weaviate/ingest.sh b/examples/ingest/weaviate/ingest.sh index 773c6f91cd..0c77886a81 100644 --- a/examples/ingest/weaviate/ingest.sh +++ b/examples/ingest/weaviate/ingest.sh @@ -16,7 +16,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --reprocess \ --input-path example-docs/book-war-and-peace-1225p.txt \ --work-dir weaviate-work-dir \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-new-after-n-chars 2500 --chunk-multipage-sections \ --embedding-provider "langchain-huggingface" \ weaviate \ diff --git a/scripts/elasticsearch-test-helpers/destination_connector/elasticsearch_elements_mappings.json b/scripts/elasticsearch-test-helpers/destination_connector/elasticsearch_elements_mappings.json new file mode 100644 index 0000000000..8cf038c78b --- /dev/null +++ b/scripts/elasticsearch-test-helpers/destination_connector/elasticsearch_elements_mappings.json @@ -0,0 +1,145 @@ +{ + "properties": { + "element_id": { + "type": "keyword" + }, + "text": { + "type": "text", + "analyzer": "english" + }, + "type": { + "type": "text" + }, + "embeddings": { + "type": "dense_vector", + "dims": 384 + }, + "metadata": { + "type": "object", + "properties": { + "category_depth": { + "type": "integer" + }, + "parent_id": { + "type": "keyword" + }, + "attached_to_filename": { + "type": "keyword" + }, + "filetype": { + "type": "keyword" + }, + "last_modified": { + "type": "date" + }, + "file_directory": { + "type": "keyword" + }, + "filename": { + "type": "keyword" + }, + "data_source": { + "type": "object", + "properties": { + "url": { + "type": "text", + "analyzer": "standard" + }, + "version": { + "type": "keyword" + }, + "date_created": { + "type": "date" + }, + "date_modified": { + "type": "date" + }, + "date_processed": { + "type": "date" + }, + "record_locator": { + "type": "keyword" + }, + "permissions_data": { + "type": "object" + } + } + }, + "coordinates": { + "type": "object", + "properties": { + "system": { + "type": "keyword" + }, + "layout_width": { + "type": "float" + }, + "layout_height": { + "type": "float" + }, + "points": { + "type": "float" + } + } + }, + "languages": { + "type": "keyword" + }, + "page_number": { + "type": "integer" + }, + "page_name": { + "type": "keyword" + }, + "url": { + "type": "text", + "analyzer": "standard" + }, + "links": { + "type": "object" + }, + "link_urls": { + "type": "text" + }, + "link_texts": { + "type": "text" + }, + "sent_from": { + "type": "text", + "analyzer": "standard" + }, + "sent_to": { + "type": "text", + "analyzer": "standard" + }, + "subject": { + "type": "text", + "analyzer": "standard" + }, + "section": { + "type": "text", + "analyzer": "standard" + }, + "header_footer_type": { + "type": "keyword" + }, + "emphasized_text_contents": { + "type": "text" + }, + "emphasized_text_tags": { + "type": "keyword" + }, + "text_as_html": { + "type": "text", + "analyzer": "standard" + }, + "regex_metadata": { + "type": "object" + }, + "detection_class_prob": { + "type": "float" + } + } + } + } +} \ No newline at end of file diff --git a/scripts/elasticsearch-test-helpers/destination_connector/es_cluster_config.py b/scripts/elasticsearch-test-helpers/destination_connector/es_cluster_config.py index 5553994f9d..bada66e3bb 100644 --- a/scripts/elasticsearch-test-helpers/destination_connector/es_cluster_config.py +++ b/scripts/elasticsearch-test-helpers/destination_connector/es_cluster_config.py @@ -5,7 +5,9 @@ INDEX_NAME = "ingest-test-destination" USER = os.environ["ELASTIC_USER"] PASSWORD = os.environ["ELASTIC_PASSWORD"] -MAPPING_PATH = "docs/source/ingest/destination_connectors/data/elasticsearch_elements_mappings.json" +MAPPING_PATH = ( + "scripts/elasticsearch-test-helpers/destination_connector/elasticsearch_elements_mappings.json" +) with open(MAPPING_PATH) as f: mappings = json.load(f) diff --git a/scripts/opensearch-test-helpers/destination_connector/opensearch_cluster_config.py b/scripts/opensearch-test-helpers/destination_connector/opensearch_cluster_config.py index b16fc2fff3..ae3b2fe780 100644 --- a/scripts/opensearch-test-helpers/destination_connector/opensearch_cluster_config.py +++ b/scripts/opensearch-test-helpers/destination_connector/opensearch_cluster_config.py @@ -4,7 +4,9 @@ INDEX_NAME = "ingest-test-destination" USER = "admin" PASSWORD = "admin" -MAPPING_PATH = "docs/source/ingest/destination_connectors/data/opensearch_elements_mappings.json" +MAPPING_PATH = ( + "scripts/opensearch-test-helpers/destination_connector/opensearch_elements_mappings.json" +) with open(MAPPING_PATH) as f: mappings = json.load(f) diff --git a/scripts/opensearch-test-helpers/destination_connector/opensearch_elements_mappings.json b/scripts/opensearch-test-helpers/destination_connector/opensearch_elements_mappings.json new file mode 100644 index 0000000000..cfdcaa12c5 --- /dev/null +++ b/scripts/opensearch-test-helpers/destination_connector/opensearch_elements_mappings.json @@ -0,0 +1,152 @@ +{"settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "element_id": { + "type": "keyword" + }, + "text": { + "type": "text", + "analyzer": "english" + }, + "type": { + "type": "text" + }, + "embeddings": { + "type": "knn_vector", + "dimension": 384 + }, + "metadata": { + "type": "object", + "properties": { + "category_depth": { + "type": "integer" + }, + "parent_id": { + "type": "keyword" + }, + "attached_to_filename": { + "type": "keyword" + }, + "filetype": { + "type": "keyword" + }, + "last_modified": { + "type": "date" + }, + "file_directory": { + "type": "keyword" + }, + "filename": { + "type": "keyword" + }, + "data_source": { + "type": "object", + "properties": { + "url": { + "type": "text", + "analyzer": "standard" + }, + "version": { + "type": "keyword" + }, + "date_created": { + "type": "date" + }, + "date_modified": { + "type": "date" + }, + "date_processed": { + "type": "date" + }, + "record_locator": { + "type": "keyword" + }, + "permissions_data": { + "type": "object" + } + } + }, + "coordinates": { + "type": "object", + "properties": { + "system": { + "type": "keyword" + }, + "layout_width": { + "type": "float" + }, + "layout_height": { + "type": "float" + }, + "points": { + "type": "float" + } + } + }, + "languages": { + "type": "keyword" + }, + "page_number": { + "type": "integer" + }, + "page_name": { + "type": "keyword" + }, + "url": { + "type": "text", + "analyzer": "standard" + }, + "links": { + "type": "object" + }, + "link_urls": { + "type": "text" + }, + "link_texts": { + "type": "text" + }, + "sent_from": { + "type": "text", + "analyzer": "standard" + }, + "sent_to": { + "type": "text", + "analyzer": "standard" + }, + "subject": { + "type": "text", + "analyzer": "standard" + }, + "section": { + "type": "text", + "analyzer": "standard" + }, + "header_footer_type": { + "type": "keyword" + }, + "emphasized_text_contents": { + "type": "text" + }, + "emphasized_text_tags": { + "type": "keyword" + }, + "text_as_html": { + "type": "text", + "analyzer": "standard" + }, + "regex_metadata": { + "type": "object" + }, + "detection_class_prob": { + "type": "float" + } + } + } + } + } +} diff --git a/test_unstructured_ingest/dest/astra.sh b/test_unstructured_ingest/dest/astra.sh index 36129c3a9f..8f09e288a9 100755 --- a/test_unstructured_ingest/dest/astra.sh +++ b/test_unstructured_ingest/dest/astra.sh @@ -47,7 +47,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --verbose \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-max-characters 1500 \ --chunk-multipage-sections \ --embedding-provider "langchain-huggingface" \ diff --git a/test_unstructured_ingest/dest/chroma.sh b/test_unstructured_ingest/dest/chroma.sh index ba68a267cd..d75a246fa6 100755 --- a/test_unstructured_ingest/dest/chroma.sh +++ b/test_unstructured_ingest/dest/chroma.sh @@ -45,7 +45,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --verbose \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-max-characters 1500 \ --chunk-multipage-sections \ --embedding-provider "langchain-huggingface" \ diff --git a/test_unstructured_ingest/dest/clarifai.sh b/test_unstructured_ingest/dest/clarifai.sh index 41f9565781..2ed046aaec 100755 --- a/test_unstructured_ingest/dest/clarifai.sh +++ b/test_unstructured_ingest/dest/clarifai.sh @@ -70,7 +70,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --input-path example-docs/book-war-and-peace-1p.txt \ --output-dir "$OUTPUT_DIR" \ --strategy fast \ - --chunk-elements \ + --chunking-strategy by_title \ --num-processes "$max_processes" \ --work-dir "$WORK_DIR" \ --verbose \ diff --git a/test_unstructured_ingest/dest/elasticsearch.sh b/test_unstructured_ingest/dest/elasticsearch.sh index 391a630028..96cf678049 100755 --- a/test_unstructured_ingest/dest/elasticsearch.sh +++ b/test_unstructured_ingest/dest/elasticsearch.sh @@ -45,7 +45,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --reprocess \ --input-path example-docs/book-war-and-peace-1225p.txt \ --work-dir "$WORK_DIR" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-combine-text-under-n-chars 200 \ --chunk-new-after-n-chars 2500 \ --chunk-max-characters 38000 \ diff --git a/test_unstructured_ingest/dest/pinecone.sh b/test_unstructured_ingest/dest/pinecone.sh index d790b3f4f2..e9badb7375 100755 --- a/test_unstructured_ingest/dest/pinecone.sh +++ b/test_unstructured_ingest/dest/pinecone.sh @@ -91,7 +91,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --reprocess \ --input-path example-docs/book-war-and-peace-1225p.txt \ --work-dir "$WORK_DIR" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-combine-text-under-n-chars 200 --chunk-new-after-n-chars 2500 --chunk-max-characters 38000 --chunk-multipage-sections \ --embedding-provider "langchain-huggingface" \ pinecone \ diff --git a/test_unstructured_ingest/dest/qdrant.sh b/test_unstructured_ingest/dest/qdrant.sh index 2e884d37ea..47f797165a 100755 --- a/test_unstructured_ingest/dest/qdrant.sh +++ b/test_unstructured_ingest/dest/qdrant.sh @@ -60,7 +60,7 @@ PYTHONPATH=. ./unstructured/ingest/main.py \ --reprocess \ --input-path example-docs/book-war-and-peace-1225p.txt \ --work-dir "$WORK_DIR" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-combine-text-under-n-chars 200 --chunk-new-after-n-chars 2500 --chunk-max-characters 38000 --chunk-multipage-sections \ --embedding-provider "langchain-huggingface" \ qdrant \ diff --git a/test_unstructured_ingest/dest/s3.sh b/test_unstructured_ingest/dest/s3.sh index e6571b0ff9..99b95fbee8 100755 --- a/test_unstructured_ingest/dest/s3.sh +++ b/test_unstructured_ingest/dest/s3.sh @@ -7,7 +7,6 @@ SCRIPT_DIR=$(dirname "$DEST_PATH") cd "$SCRIPT_DIR"/.. || exit 1 OUTPUT_FOLDER_NAME=s3-dest OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} DESTINATION_S3="s3://utic-dev-tech-fixtures/utic-ingest-test-fixtures-output/$(uuidgen)/" @@ -16,7 +15,6 @@ CI=${CI:-"false"} # shellcheck disable=SC1091 source "$SCRIPT_DIR"/cleanup.sh function cleanup() { - cleanup_dir "$OUTPUT_DIR" cleanup_dir "$WORK_DIR" if aws s3 ls "$DESTINATION_S3" --region us-east-2; then @@ -31,7 +29,6 @@ RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ --strategy fast \ --verbose \ --reprocess \ diff --git a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh b/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh index 9d86dadae0..5c222a4599 100755 --- a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh +++ b/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh @@ -98,7 +98,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --path "Shared Documents" \ --recursive \ --embedding-provider "langchain-huggingface" \ - --chunk-elements \ + --chunking-strategy by_title \ --chunk-multipage-sections \ --work-dir "$WORK_DIR" \ azure-cognitive-search \ diff --git a/test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.txt.json b/test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.json similarity index 99% rename from test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.txt.json rename to test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.json index ac33a4d496..0a92688667 100644 --- a/test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.txt.json +++ b/test_unstructured_ingest/expected-structured-output/embed-bedrock/book-war-and-peace-1p.json @@ -4,18 +4,20 @@ "element_id": "ecc6ecfdda0975d91546edf1cd407e86", "text": "CHAPTER I", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.66796875, @@ -1561,18 +1563,20 @@ "element_id": "9ab2c6fc3fff6cedc83ffce2ffcc8705", "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.89453125, @@ -3118,18 +3122,20 @@ "element_id": "19fd6c4711db1634658ca1b582bbb282", "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.13964844, @@ -4675,18 +4681,20 @@ "element_id": "9a29e46f349e568cbec4c13d6ac6d3e1", "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.45703125, @@ -6232,18 +6240,20 @@ "element_id": "e37f7a0354c187111dbbd76c353a8548", "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.1484375, @@ -7789,18 +7799,20 @@ "element_id": "ff53e9e3e2c0a40c878f3117dfaee12e", "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.22851562, @@ -9346,18 +9358,20 @@ "element_id": "944f331f0e9f276612e232b36f28b5f6", "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.49023438, @@ -10903,18 +10917,20 @@ "element_id": "9ff1d8f7e93d526d0e3a174a51850ec8", "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.5859375, @@ -12460,18 +12476,20 @@ "element_id": "92ccca74969ed207c2b36cecb1b1b677", "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.5390625, @@ -14017,18 +14035,20 @@ "element_id": "fc189a009c8fc4507bd42d9601bdb566", "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.45703125, @@ -15574,18 +15594,20 @@ "element_id": "73b0874758fb74535ea6817963e50dc5", "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.73046875, @@ -17131,18 +17153,20 @@ "element_id": "3b8e76f2baafa3482edb98626c6fd7aa", "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.5390625, @@ -18688,18 +18712,20 @@ "element_id": "fbc14cba30b1dc3c20bd0bcbb36d7de5", "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -1.125, diff --git a/test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.txt.json b/test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.json similarity index 99% rename from test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.txt.json rename to test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.json index 29a3088219..ecd67b5e42 100644 --- a/test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.txt.json +++ b/test_unstructured_ingest/expected-structured-output/embed-vertexai/book-war-and-peace-1p.json @@ -4,18 +4,20 @@ "element_id": "ecc6ecfdda0975d91546edf1cd407e86", "text": "CHAPTER I", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.03092249296605587, @@ -793,18 +795,20 @@ "element_id": "9ab2c6fc3fff6cedc83ffce2ffcc8705", "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.011275566183030605, @@ -1582,18 +1586,20 @@ "element_id": "19fd6c4711db1634658ca1b582bbb282", "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.037365660071372986, @@ -2371,18 +2377,20 @@ "element_id": "9a29e46f349e568cbec4c13d6ac6d3e1", "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.020052220672369003, @@ -3160,18 +3168,20 @@ "element_id": "e37f7a0354c187111dbbd76c353a8548", "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.008697886019945145, @@ -3949,18 +3959,20 @@ "element_id": "ff53e9e3e2c0a40c878f3117dfaee12e", "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.008844864554703236, @@ -4738,18 +4750,20 @@ "element_id": "944f331f0e9f276612e232b36f28b5f6", "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.02565869875252247, @@ -5527,18 +5541,20 @@ "element_id": "9ff1d8f7e93d526d0e3a174a51850ec8", "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.010125050321221352, @@ -6316,18 +6332,20 @@ "element_id": "92ccca74969ed207c2b36cecb1b1b677", "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.017792271450161934, @@ -7105,18 +7123,20 @@ "element_id": "fc189a009c8fc4507bd42d9601bdb566", "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.031009025871753693, @@ -7894,18 +7914,20 @@ "element_id": "73b0874758fb74535ea6817963e50dc5", "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ -0.020816270262002945, @@ -8683,18 +8705,20 @@ "element_id": "3b8e76f2baafa3482edb98626c6fd7aa", "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.018143275752663612, @@ -9472,18 +9496,20 @@ "element_id": "fbc14cba30b1dc3c20bd0bcbb36d7de5", "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, "permissions_data": [ { "mode": 33188 } ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] + } }, "embeddings": [ 0.01508738100528717, diff --git a/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.json b/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.json new file mode 100644 index 0000000000..f682747cfe --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.json @@ -0,0 +1,5293 @@ +[ + { + "type": "Title", + "element_id": "ecc6ecfdda0975d91546edf1cd407e86", + "text": "CHAPTER I", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.039053574204444885, + 0.06777482479810715, + 0.01095886342227459, + 0.001242638798430562, + -0.013691969215869904, + 0.05574340745806694, + 0.1145429015159607, + 0.025169963017106056, + -0.007307262159883976, + -0.043478596955537796, + 0.015188274905085564, + -0.0008902916451916099, + 0.08180288970470428, + -0.041000235825777054, + -0.014650901779532433, + -0.04903417453169823, + -0.010637535713613033, + -0.048637501895427704, + -0.05573456361889839, + -0.02101968228816986, + -0.02015926130115986, + 0.0817803367972374, + 0.022933680564165115, + 0.023277927190065384, + -0.06536835432052612, + 0.0042941696010529995, + 0.0013802351895719767, + -0.01676568016409874, + -0.039429422467947006, + -0.16493840515613556, + -0.0018060479778796434, + 0.04371352866292, + -0.030743760988116264, + -0.03578191623091698, + 0.007972940802574158, + -0.010520529933273792, + 0.08600007742643356, + 0.0008449776796624064, + 0.0691617876291275, + 0.011546935886144638, + -0.04101349413394928, + -0.02445390820503235, + -0.00878186896443367, + -0.0012528146617114544, + 0.032106123864650726, + -0.05179746076464653, + 0.020187532529234886, + 0.010731157846748829, + 0.035490721464157104, + -0.006568704731762409, + -0.012302296236157417, + 0.015625305473804474, + -0.07307188957929611, + 0.05225848779082298, + 0.005438334308564663, + 0.10789936780929565, + 0.029409075155854225, + -0.01955239474773407, + 0.06039135903120041, + 0.024580268189311028, + 0.006660499144345522, + 0.014496345072984695, + -0.059024378657341, + 0.018884191289544106, + 0.053682923316955566, + -0.04883323609828949, + -0.03478892147541046, + -0.035947706550359726, + -0.029253877699375153, + -0.005442352034151554, + -0.05627904459834099, + 0.005595133174210787, + -0.06534703075885773, + -0.06594320386648178, + -0.023416608572006226, + 0.008515028282999992, + 0.05723067745566368, + -0.02647353895008564, + 0.08162931352853775, + -0.08124542981386185, + -0.08059799671173096, + -0.02699224092066288, + 0.008641940541565418, + -0.05588868260383606, + -0.009593123570084572, + 0.08471684157848358, + 0.012107349932193756, + -0.028975313529372215, + -0.0074949609115719795, + 0.03564765676856041, + -0.047048844397068024, + -0.07886865735054016, + 0.09840915352106094, + 0.006313779857009649, + -0.05892641842365265, + 0.0418110117316246, + 0.005335851572453976, + -0.016099577769637108, + 0.05092310905456543, + 0.22539611160755157, + -0.006117882672697306, + -0.029893962666392326, + 0.006087993737310171, + -0.03947218134999275, + -0.08597657084465027, + -0.027387086302042007, + -0.03198559954762459, + -0.04598864167928696, + -0.014253820292651653, + -0.04349171742796898, + -0.06363191455602646, + -0.034045107662677765, + -0.007654491811990738, + 0.0038879921194165945, + 0.06429543346166611, + 0.016424639150500298, + 0.13503171503543854, + 0.02015267312526703, + 0.05455942451953888, + 0.012581412680447102, + 0.031854335218667984, + 0.02650105021893978, + 0.018824096769094467, + 0.10952351242303848, + -0.08162876963615417, + -0.039606720209121704, + -0.0032470314763486385, + -4.269090588339304e-33, + 0.061855118721723557, + -0.08925724774599075, + 0.013858199119567871, + 0.014075208455324173, + 0.04526891931891441, + 0.00249670073390007, + 0.019853856414556503, + 0.0409410186111927, + -0.035249315202236176, + 4.28669445682317e-05, + -0.06601119041442871, + -0.003698282642289996, + -0.06800548732280731, + -0.0440298393368721, + 0.05726302042603493, + -0.004475518595427275, + 0.018038572743535042, + 0.005330387968569994, + -0.09185048192739487, + -0.01272667571902275, + 0.037934865802526474, + 0.03344474732875824, + 0.03331829234957695, + -0.060712192207574844, + -0.013047875836491585, + 0.020851701498031616, + -0.004250629805028439, + -0.05810995399951935, + -0.015378184616565704, + 0.07314842939376831, + 0.030952412635087967, + 0.013711050152778625, + -0.054110776633024216, + -0.1204676628112793, + -0.02648378349840641, + -0.01370607502758503, + -0.009665010496973991, + -0.027702469378709793, + 0.06314342468976974, + 0.019671481102705002, + -0.08084968477487564, + -0.0036577791906893253, + 0.017928795889019966, + 0.03003622032701969, + 0.04298318922519684, + 0.08673412352800369, + 0.13455118238925934, + 0.027358228340744972, + -0.0325857438147068, + -0.010898883454501629, + -0.0070188287645578384, + -0.02833310142159462, + -0.026672331616282463, + 0.022378331050276756, + -0.0057768188416957855, + 0.02410115860402584, + -0.04932824894785881, + -0.007980610243976116, + 0.021779432892799377, + 0.05339273065328598, + 0.048314474523067474, + 0.10402951389551163, + 0.008860395289957523, + 0.03752505034208298, + -0.07240661233663559, + -0.00011432449537096545, + 0.020175516605377197, + -0.06184636428952217, + 0.05961461737751961, + -0.05503392219543457, + -0.16565214097499847, + -0.027707360684871674, + 0.09743264317512512, + -0.00033638786408118904, + 0.056544337421655655, + -0.05199439823627472, + -0.03701367974281311, + 0.059286829084157944, + -0.1309632956981659, + 0.016191326081752777, + -0.057364895939826965, + -0.03829443082213402, + -0.07643566280603409, + 0.039539702236652374, + 0.05474052205681801, + -0.0031980290077626705, + 0.05783208832144737, + -0.05534063279628754, + -0.006174846086651087, + -0.0639599934220314, + 0.036954332143068314, + -0.004816036205738783, + 0.08334646373987198, + 0.040309593081474304, + 0.004848731216043234, + 1.546801608507362e-33, + -0.010510774329304695, + -0.11877679079771042, + -0.02399861440062523, + -0.04503890872001648, + 0.019606268033385277, + -0.0183505117893219, + -0.02668437361717224, + -0.010227954015135765, + -0.06658010929822922, + 0.014446600340306759, + -0.10882732272148132, + 0.017114970833063126, + 0.01623861864209175, + 0.0640726312994957, + 0.08058273047208786, + 0.01955559104681015, + 0.04779678210616112, + -0.01391725055873394, + -0.04839383065700531, + -0.006132079754024744, + -0.023716801777482033, + -0.010434424504637718, + -0.007593447808176279, + -0.04394460842013359, + 0.024425875395536423, + 0.00040804315358400345, + 0.026529528200626373, + 0.03431902453303337, + -0.005246742162853479, + 0.03659745305776596, + 0.01670682244002819, + -0.03680446371436119, + -0.043138571083545685, + -0.005520101170986891, + -0.029772009700536728, + 0.009255989454686642, + 0.03093816712498665, + -0.038504958152770996, + 0.007130167447030544, + 0.0025368970818817616, + 0.0879099890589714, + -0.03878892585635185, + 0.019288672134280205, + 0.09237164258956909, + -0.017960863187909126, + -0.0156407468020916, + 0.07834358513355255, + 0.03496071323752403, + -0.056599754840135574, + 0.04549145698547363, + -0.060227882117033005, + -0.07567070424556732, + 0.043599169701337814, + -0.0833909809589386, + 0.013377184979617596, + 0.022528760135173798, + -0.02521529793739319, + -0.02478679083287716, + 0.02125546522438526, + 0.03799215704202652, + -0.016987860202789307, + 0.062367551028728485, + -0.07540281862020493, + 0.08831459283828735, + 0.0662769004702568, + -0.06869129836559296, + -0.032368242740631104, + -0.08423380553722382, + 0.03145943954586983, + 0.032238345593214035, + -0.06467296183109283, + -0.04131615161895752, + 0.00870375894010067, + 0.07175710797309875, + 0.017684906721115112, + -0.028763344511389732, + -0.11189687252044678, + -0.006346574053168297, + -0.10585834830999374, + -0.0904373824596405, + -0.0827941820025444, + -0.04315425083041191, + -0.032436370849609375, + 0.03468124195933342, + 0.030602777376770973, + -0.013560143299400806, + 0.09569393843412399, + 0.0010198664385825396, + 0.04504832997918129, + 0.009828410111367702, + 0.020692016929388046, + 0.053285591304302216, + 0.0563117191195488, + 0.0005695075378753245, + 0.027328839525580406, + -1.4383413571295023e-08, + 0.016748789697885513, + 0.06547888368368149, + 0.07251796871423721, + 0.034017667174339294, + 0.01671898551285267, + 0.027632959187030792, + -0.018502725288271904, + 0.042066700756549835, + -0.02497069537639618, + 0.05260665342211723, + 0.055694546550512314, + 0.048802848905324936, + 0.10564486682415009, + 0.0568138025701046, + 0.017104096710681915, + 0.07104688137769699, + -0.0012886116746813059, + -0.008612019009888172, + -0.026398880407214165, + -0.027114199474453926, + 0.05297686904668808, + 0.013118482194840908, + 0.01929987408220768, + -0.1022021546959877, + -0.027542220428586006, + 0.027875332161784172, + -0.036229029297828674, + 0.10509010404348373, + -0.019843457266688347, + 0.030714411288499832, + 0.026830829679965973, + 0.07362941652536392, + -0.12515154480934143, + -0.03394259139895439, + -0.008360006846487522, + 0.030580025166273117, + 0.016863664612174034, + 0.01787250116467476, + -0.0002851359313353896, + -0.09362590312957764, + 0.020578758791089058, + -0.006355518940836191, + 0.07356223464012146, + 0.019790606573224068, + 0.002965264953672886, + 0.02446441911160946, + 0.0096715297549963, + 0.0164011362940073, + 0.04246218502521515, + -0.060157183557748795, + -0.07367903739213943, + 0.03363671153783798, + 0.02566555142402649, + 0.07107982039451599, + 0.013282394036650658, + 0.05232807993888855, + -0.004840012174099684, + -0.025187980383634567, + -0.07005799561738968, + -0.011108246631920338, + 0.11634697020053864, + -0.0358431451022625, + -0.06202442944049835, + -0.08774126321077347 + ] + }, + { + "type": "NarrativeText", + "element_id": "9ab2c6fc3fff6cedc83ffce2ffcc8705", + "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.05874714255332947, + 0.10656474530696869, + 0.03050887957215309, + -0.024821626022458076, + -0.0244454275816679, + 0.060092151165008545, + 0.051419422030448914, + -0.08806683123111725, + -0.06982982158660889, + -0.10064217448234558, + -0.050494249910116196, + -0.0856509655714035, + -0.001889734878204763, + -0.06068073585629463, + -0.07801132649183273, + 0.02243422158062458, + -0.02127995900809765, + 0.013246431946754456, + 0.007391561288386583, + 0.15346582233905792, + 0.010216226801276207, + -0.03732678294181824, + 0.02913215011358261, + 0.1313096135854721, + 0.033914972096681595, + 0.07146815955638885, + 0.06141148880124092, + 0.06220632418990135, + -0.038903698325157166, + 0.02126525714993477, + 0.024463649839162827, + -0.008229871280491352, + 0.0019652098417282104, + -0.022251710295677185, + 0.04346247389912605, + -0.02431749552488327, + 0.06304806470870972, + -0.0032200270798057318, + 0.04085702449083328, + -0.061382830142974854, + 0.07564105093479156, + 0.03557412698864937, + 0.05523449555039406, + 0.12190327793359756, + 0.0015077260322868824, + -0.0056305257603526115, + -0.012865928933024406, + 0.04743355140089989, + 0.03452764451503754, + -0.07475459575653076, + -0.0702810287475586, + -0.03263530135154724, + 0.009802860207855701, + -0.030406644567847252, + -0.07674538344144821, + 0.12278791517019272, + 0.006970102433115244, + -0.04915524646639824, + 0.032304130494594574, + -0.008575448766350746, + -0.008731338195502758, + 0.06175641342997551, + 0.02136044204235077, + 0.06800870597362518, + -0.05496224761009216, + 0.03136278688907623, + -0.0031887914519757032, + 0.09498440474271774, + -0.15730291604995728, + 0.08546384423971176, + 0.05568832531571388, + -0.018731610849499702, + 0.03184361383318901, + -0.03811872750520706, + -0.032170187681913376, + -0.033344849944114685, + 0.056314561516046524, + -0.06807486712932587, + 0.012875617481768131, + -0.05491160973906517, + -0.009411911480128765, + -0.06047646328806877, + -0.0050972094759345055, + 0.03667799383401871, + -0.017359159886837006, + -0.043124936521053314, + 0.13297699391841888, + -0.05144326016306877, + 0.07867272198200226, + 0.004182144999504089, + 0.011900841258466244, + 0.01292786467820406, + 0.01813146471977234, + 0.05150924623012543, + -0.03438987955451012, + 0.03820415958762169, + 0.005536296404898167, + 0.10068918764591217, + -0.06529213488101959, + 0.049779098480939865, + 0.00473055150359869, + -0.05184305086731911, + 0.051430843770504, + 0.01334310881793499, + -0.06723044067621231, + -0.0021935193799436092, + -0.037461258471012115, + 0.0004998851800337434, + -0.004775731358677149, + -0.08317895233631134, + 0.00939151830971241, + -0.0628887340426445, + -0.01943228766322136, + -0.020009055733680725, + -0.027380872517824173, + 0.04080546647310257, + 0.09818447381258011, + 0.0047019897028803825, + -0.06036354973912239, + -0.0030813238117843866, + 0.0446375347673893, + 0.03669815883040428, + -0.05477478355169296, + 0.09709606319665909, + 0.0022707011085003614, + -0.019855158403515816, + -0.04361126944422722, + -2.564797165063733e-33, + -0.005204022396355867, + -0.034174852073192596, + -0.000351788941770792, + 0.0342654250562191, + -0.035254135727882385, + 0.034352824091911316, + -0.07474210113286972, + -0.01232664193958044, + -0.05785913020372391, + -0.04832305386662483, + -0.024462489411234856, + -0.007020951248705387, + -0.03689669445157051, + 0.043854836374521255, + -0.033271558582782745, + 0.04244591295719147, + -0.058089204132556915, + -0.02207816392183304, + 0.027183154597878456, + -0.07541303336620331, + 0.032816559076309204, + 0.0015269170980900526, + -0.06739696860313416, + -0.0010412997798994184, + 0.0195449348539114, + 0.030106471851468086, + 0.08410672098398209, + 0.029623055830597878, + 0.04990166053175926, + 0.058037422597408295, + 0.027960408478975296, + 0.03548738732933998, + 0.04623204097151756, + -0.04980506747961044, + 0.016254911199212074, + 0.0800669863820076, + -0.07473579794168472, + -0.02717609703540802, + -0.058577761054039, + -0.018578259274363518, + -0.048805948346853256, + 0.032568030059337616, + -0.08259385824203491, + 0.023284832015633583, + 0.00024890073109418154, + -0.1252095252275467, + 0.017054660245776176, + -0.015016729943454266, + 0.02628587931394577, + -0.0034273015335202217, + 0.011149099096655846, + -0.07744869589805603, + -0.06510678678750992, + 0.054695598781108856, + 0.026935169473290443, + -0.028602445498108864, + -0.10690239816904068, + 0.07598371803760529, + 0.04779389873147011, + -0.06476539373397827, + 0.08826077729463577, + -0.05912713333964348, + 0.024526655673980713, + 0.09490557014942169, + -0.013721351511776447, + -0.029740087687969208, + 0.00040637634810991585, + 0.06746294349431992, + -0.003036905312910676, + -0.07345454394817352, + -0.027896616607904434, + -0.06864512711763382, + 0.009670508094131947, + 0.010829431004822254, + -0.05995146557688713, + -0.001629870617762208, + -0.019092809408903122, + 0.007018237374722958, + -0.0325867161154747, + -0.03575168922543526, + -0.039596933871507645, + -0.003733622143045068, + 0.03977552801370621, + 0.07186185568571091, + 0.05589253455400467, + -0.018120281398296356, + 0.07676935940980911, + -0.01822086051106453, + -0.014926361851394176, + 0.04769439622759819, + -0.03230486065149307, + 0.04005064442753792, + 0.053353749215602875, + -0.028874794021248817, + -0.12175753712654114, + -5.007500782859844e-34, + 0.005682002753019333, + -0.051021382212638855, + 0.04206480458378792, + -0.014440439641475677, + -0.11980569362640381, + -0.049977440387010574, + -0.028303442522883415, + 0.031310953199863434, + 0.04197293892502785, + 0.009160526096820831, + -0.06150886043906212, + -0.010343263857066631, + 0.06027752161026001, + -0.061735011637210846, + 0.015110868960618973, + -0.004956221207976341, + 0.12118169665336609, + 0.020087195560336113, + -0.07931522279977798, + -0.019512474536895752, + -0.05346915125846863, + 0.006304183974862099, + -0.05663385987281799, + 0.026090450584888458, + 0.007936645299196243, + -0.01273948885500431, + 0.0850648581981659, + 0.02650691196322441, + -0.0325193889439106, + -0.0995645821094513, + -0.023561470210552216, + -0.01348517183214426, + -0.05534840375185013, + 0.0048078917898237705, + 0.01003493182361126, + 0.0803077220916748, + 0.04051421955227852, + -0.0031254051718860865, + 0.049714166671037674, + -0.031183114275336266, + -0.05248817428946495, + -0.056404102593660355, + -0.10136302560567856, + -0.011093150824308395, + -0.06251668930053711, + 0.03959472104907036, + 0.06921857595443726, + 0.008909943513572216, + 0.04276519641280174, + 0.008871695026755333, + -0.006893877871334553, + -0.03439163416624069, + -0.011620843783020973, + -0.027362683787941933, + -0.043699078261852264, + -0.12517930567264557, + -0.03718714416027069, + -0.04610683023929596, + -0.015714460983872414, + -0.01418638788163662, + 0.01614246889948845, + 0.03098599798977375, + -0.029197778552770615, + 0.01756308227777481, + 0.020264318212866783, + -0.0013386603677645326, + -0.08551283180713654, + 0.03466145694255829, + 0.0629744604229927, + 0.014522727578878403, + -0.018858881667256355, + 0.02214154414832592, + -0.15967164933681488, + 0.038297515362501144, + 0.0016653237398713827, + 0.04812467843294144, + -0.05668596178293228, + -0.10623833537101746, + 0.0020920138340443373, + -0.016548091545701027, + 0.07570970803499222, + -0.08482731878757477, + 0.013707313686609268, + 0.05509072542190552, + 0.0012867775512859225, + -0.07505054026842117, + 0.040880072861909866, + 0.026234952732920647, + 0.011179892346262932, + 0.039860762655735016, + -0.042105864733457565, + -0.050777290016412735, + 0.03062119521200657, + -0.06969141215085983, + 0.03848576545715332, + -5.153237125909982e-08, + 0.10533054172992706, + -0.01505187340080738, + 0.04974827170372009, + 0.003194242250174284, + -0.0502471998333931, + -0.0004891534917987883, + -0.055042117834091187, + 0.008238887414336205, + 0.04313642159104347, + 0.0462891049683094, + 0.04256117343902588, + -0.03983268514275551, + 0.0578257255256176, + -0.03386905789375305, + -0.02273518778383732, + 0.08741379529237747, + 0.0051943217404186726, + -0.09908322989940643, + -0.029871750622987747, + -0.057004544883966446, + -0.049455802887678146, + 0.04676390811800957, + 0.027027742937207222, + -0.025994017720222473, + 0.02325207181274891, + -0.010954251512885094, + -0.009121127426624298, + -0.04941120743751526, + -0.03576080501079559, + -0.002564789028838277, + 0.038502324372529984, + -0.03570831939578056, + -0.07853586226701736, + 0.02988108992576599, + -0.03232889249920845, + 0.0753493383526802, + 0.029644900932908058, + -0.0054330299608409405, + 0.0780261754989624, + -0.039111267775297165, + 0.028155505657196045, + 0.05981967970728874, + 0.005577471107244492, + 0.03696586564183235, + 0.022851616144180298, + -0.04101957753300667, + 0.011825652793049812, + -0.03328275308012962, + -0.01723749190568924, + 0.009173493832349777, + 0.008293936029076576, + -0.00026000005891546607, + 0.03159288316965103, + 0.03948487341403961, + -0.012713965028524399, + 0.015469718724489212, + -0.0009076553978957236, + 0.10846077650785446, + -0.03717157989740372, + 0.08666627109050751, + 0.04925926774740219, + -0.013702018186450005, + -0.048068877309560776, + -0.04492269828915596 + ] + }, + { + "type": "NarrativeText", + "element_id": "19fd6c4711db1634658ca1b582bbb282", + "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.05423668771982193, + 0.054445262998342514, + 0.020339174196124077, + 0.08231724053621292, + -0.07275068759918213, + 0.09235041588544846, + 0.06840193271636963, + 0.013299662619829178, + -0.015014569275081158, + -0.05566733330488205, + -0.05068721994757652, + 0.0242469422519207, + -0.014591077342629433, + -0.0486467070877552, + -0.09186422824859619, + 0.013159358873963356, + -0.0007340788142755628, + 0.023640664294362068, + 0.057755883783102036, + 0.07107026129961014, + -0.03849221393465996, + 0.021858634427189827, + 0.019213736057281494, + 0.04542824998497963, + 0.027777139097452164, + 0.01152915507555008, + -0.05850802734494209, + -0.027106154710054398, + 0.03341171517968178, + 0.03780396655201912, + -0.048809442669153214, + -0.02951604127883911, + 0.024384213611483574, + 0.08260197192430496, + -0.010294260457158089, + 0.024427786469459534, + 0.0046907407231628895, + 0.042408570647239685, + 0.007938776165246964, + 0.035158898681402206, + -0.014480923302471638, + -0.08043234050273895, + -0.01727035455405712, + -0.039487097412347794, + 0.013404544442892075, + -0.025633588433265686, + -0.08467279374599457, + 0.09215318411588669, + 0.0550311878323555, + 0.00045374719775281847, + 0.004816462751477957, + 0.01783987507224083, + -0.03210141509771347, + -0.04114364832639694, + 0.0556008517742157, + -0.02144431322813034, + 0.021150898188352585, + 0.0022396636195480824, + 0.009305122308433056, + 0.03832445666193962, + -0.027697503566741943, + 0.017343325540423393, + 0.08889875560998917, + 0.05170465633273125, + -0.03271825611591339, + -0.10792192071676254, + 0.06509971618652344, + 0.012057877145707607, + 0.048848144710063934, + 0.07896538078784943, + -0.024069225415587425, + 0.07106571644544601, + 0.00717023853212595, + -0.04288620129227638, + -0.04381057992577553, + -0.019540946930646896, + -0.0006824576412327588, + -0.023883521556854248, + -0.013316082768142223, + 0.01220389548689127, + -0.0234786719083786, + -0.031126664951443672, + -0.03533963859081268, + 0.06731270253658295, + -0.06599041074514389, + -0.05461878329515457, + -0.019474947825074196, + -0.044913195073604584, + 0.0263291597366333, + -0.041479311883449554, + 0.00025415379786863923, + -0.04920981824398041, + -0.05709255859255791, + 0.03583342954516411, + -0.016211632639169693, + -0.04054819792509079, + -0.106109119951725, + 0.00895263534039259, + -0.00949755311012268, + 0.02762090601027012, + 0.007005187217146158, + 0.0415591299533844, + -0.008658170700073242, + 0.02714497782289982, + -0.036862097680568695, + 0.023691613227128983, + 0.0018037331756204367, + -0.1432705968618393, + -0.08328872919082642, + 0.01839425228536129, + -0.005625145509839058, + -0.06089228391647339, + 0.06296160817146301, + -0.10043159872293472, + -0.017577864229679108, + 0.11918678134679794, + -0.04518730193376541, + -0.061427466571331024, + 0.013173188082873821, + -0.05008642375469208, + -0.026333415880799294, + -0.08440456539392471, + -0.05948567017912865, + 0.04684460535645485, + -0.020805275067687035, + 0.010918369516730309, + 0.06306127458810806, + -1.833076222165593e-34, + 0.02366311475634575, + -0.021635068580508232, + 0.09398184716701508, + 0.06558018177747726, + 0.02811088040471077, + -0.02855352871119976, + 0.025134075433015823, + -0.05164063349366188, + -0.021254729479551315, + -0.02356044575572014, + 0.041061729192733765, + -0.03740977123379707, + -0.03663377836346626, + -0.13780520856380463, + -0.003202243009582162, + 0.02822616882622242, + -0.13028380274772644, + 0.07633689790964127, + 0.017309321090579033, + 0.07765893638134003, + 0.07021743804216385, + 0.015129951760172844, + -0.03260749951004982, + 0.07045388966798782, + -0.02885528653860092, + 0.03854513540863991, + 0.041499242186546326, + -0.020848512649536133, + 0.05232222378253937, + -0.00822402723133564, + 0.005626597907394171, + 0.01391650177538395, + 0.0328478068113327, + -0.0043826536275446415, + -0.015786724165081978, + -0.051043801009655, + 0.04193836823105812, + -0.06510256230831146, + -0.012801111675798893, + -0.006315520964562893, + 0.05624018609523773, + -0.060843098908662796, + 0.09915226697921753, + -0.01780638098716736, + -0.06444515287876129, + -0.0134547995403409, + -0.07744941115379333, + -0.019391261041164398, + 0.08984300494194031, + -0.09032350778579712, + -0.042681071907281876, + -0.05032941326498985, + -0.0047477837651968, + 0.14171956479549408, + -0.01961122639477253, + 0.03920887038111687, + 0.058255866169929504, + 0.04776240140199661, + 0.09023989737033844, + -0.044679608196020126, + -0.05505826696753502, + -0.007849724031984806, + 0.03222421184182167, + -0.012056529521942139, + 0.09152241051197052, + -0.07704488188028336, + -0.03984208405017853, + 0.051285307854413986, + -0.041155312210321426, + 0.00780333811417222, + 0.042445629835128784, + 0.0681518018245697, + -0.0684749186038971, + -0.028895793482661247, + 0.05384024977684021, + 0.03684701770544052, + -0.008438502438366413, + 0.016927160322666168, + -0.036253128200769424, + 0.015754181891679764, + -0.0026881759986281395, + -0.021143684163689613, + -0.0036495632957667112, + 0.10435760766267776, + -0.07923290878534317, + -0.06165701523423195, + 0.013717937283217907, + 0.004878682550042868, + 0.0072635323740541935, + 0.06831807643175125, + -0.0740785151720047, + 0.06240532174706459, + 0.054031506180763245, + -0.031275924295186996, + -0.12862621247768402, + -2.122634046646968e-33, + 0.0013190781464800239, + 0.029269909486174583, + 0.02589591220021248, + 0.12991963326931, + 0.024839188903570175, + 0.019481269642710686, + -0.061224259436130524, + 0.05613133683800697, + -0.07899624854326248, + -0.05418183654546738, + 0.04206905514001846, + -0.027046604081988335, + 0.03789372369647026, + 0.035657670348882675, + 0.011533142067492008, + 0.06362226605415344, + 0.03290103003382683, + 0.06732577830553055, + -0.022412290796637535, + 0.028121434152126312, + -0.010809613391757011, + 0.007977122440934181, + -0.02378970943391323, + -0.015545660629868507, + 0.012052400037646294, + -0.0003049416118301451, + 0.13803179562091827, + -0.049637001007795334, + -0.13836205005645752, + 0.03576039895415306, + 0.02836824767291546, + 0.01847919076681137, + -0.11063776165246964, + 0.05067222937941551, + 0.014886993914842606, + -0.0036956777330487967, + 0.04480913281440735, + 0.02217531017959118, + 0.019430911168456078, + 0.004540923982858658, + -0.002111421199515462, + -0.005595929455012083, + 0.0156396571546793, + 0.04120778664946556, + 0.08499997109174728, + -0.04632548242807388, + -0.04556259140372276, + -0.013389877043664455, + 0.08864812552928925, + 0.024622570723295212, + -0.05871438607573509, + 0.013165379874408245, + -0.008856141939759254, + 0.026293974369764328, + 0.004199484828859568, + 0.013887232169508934, + -0.01239768322557211, + -0.0915493443608284, + 0.07278922200202942, + 0.006452771369367838, + -0.021723298355937004, + -0.019711915403604507, + -0.10501731187105179, + -0.12632641196250916, + -0.011150214821100235, + -0.023831907659769058, + -0.015914227813482285, + 0.06242978200316429, + 0.004688833374530077, + -0.05650947988033295, + 0.004797110799700022, + 0.0024531856179237366, + 0.03858102485537529, + 0.12522628903388977, + 0.04028625041246414, + 0.001992783509194851, + 0.016523126512765884, + -0.03520103171467781, + 0.013053569942712784, + -0.014520098455250263, + -0.027132712304592133, + -0.009017284028232098, + -0.013958512805402279, + -0.07057653367519379, + 0.03341757506132126, + 0.06892044842243195, + -0.00747356191277504, + -0.05211004242300987, + -0.019589446485042572, + -0.007827975787222385, + -0.021493107080459595, + 0.00322734541259706, + 0.04853278771042824, + -0.09270545840263367, + -0.07592537999153137, + -4.3007492678270864e-08, + -0.004582814406603575, + 0.0355641171336174, + -0.0002406010462436825, + -0.07003436982631683, + 0.025125637650489807, + -0.11407534778118134, + -0.011003648862242699, + -0.07727203518152237, + -0.03674670308828354, + -0.023768596351146698, + -0.07787062972784042, + -0.026298191398382187, + 0.07306752353906631, + -0.02506394498050213, + 0.10978511720895767, + -0.027902876958251, + 0.02096393145620823, + 0.004359286278486252, + -0.060212668031454086, + -0.07803106307983398, + 0.06699904799461365, + -0.024545857682824135, + 0.025769203901290894, + -0.088640958070755, + -0.05483923479914665, + 0.0686887577176094, + 0.018014902248978615, + -0.04789518937468529, + 0.04662582278251648, + -0.0233437679708004, + -0.03775409609079361, + 0.08953217417001724, + -0.08868105709552765, + -0.023651696741580963, + 0.018190694972872734, + 0.11000271141529083, + -0.013920513913035393, + -0.08008898049592972, + 0.05862165614962578, + -0.017713716253638268, + 0.006103368476033211, + 0.01848137006163597, + 0.00699786888435483, + -0.02579723298549652, + 0.011212329380214214, + 0.08030985295772552, + 0.0012547301594167948, + -0.062188733369112015, + 0.013699095696210861, + 0.04872715845704079, + -0.019186237826943398, + 0.06085670739412308, + 0.026860391721129417, + 0.07354810833930969, + -0.030469724908471107, + -0.005873874295502901, + 0.04735935479402542, + -0.007555138319730759, + 0.03952137380838394, + -0.02915903739631176, + -0.03052380122244358, + -0.0025960146449506283, + -0.048402171581983566, + 0.061455853283405304 + ] + }, + { + "type": "NarrativeText", + "element_id": "9a29e46f349e568cbec4c13d6ac6d3e1", + "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + 0.002359570236876607, + 0.047255560755729675, + 0.07864280790090561, + 0.02816120907664299, + 0.052525997161865234, + 0.04893220216035843, + -0.03865951672196388, + -0.043158188462257385, + -0.03542988747358322, + 0.049656081944704056, + -0.012053770013153553, + 0.0163655336946249, + 0.013249950483441353, + -0.07936282455921173, + -0.08082809299230576, + 0.015303845517337322, + -0.004346832633018494, + -0.05749057978391647, + 0.020369473844766617, + 0.0705939307808876, + -0.028021909296512604, + 0.024810347706079483, + 0.13321217894554138, + 0.06980545818805695, + -0.01894271932542324, + -0.060627516359090805, + -0.013086944818496704, + 0.007532228250056505, + 0.004013232886791229, + -0.009438964538276196, + -0.042332105338573456, + 0.0913439467549324, + 0.015961814671754837, + 0.036890555173158646, + 0.08075908571481705, + -0.0018513047834858298, + 0.09394267946481705, + -0.002426315099000931, + -0.04186234995722771, + 0.04612881690263748, + -0.023436415940523148, + -0.09251978993415833, + -0.030521534383296967, + 0.04476381465792656, + -0.04440254718065262, + -0.03522656112909317, + -0.026184484362602234, + 0.029133252799510956, + -0.08911262452602386, + 0.00647485489025712, + 0.02180786430835724, + 0.05243554711341858, + -0.08674845844507217, + -0.057368408888578415, + 0.012563894502818584, + 0.0019192359177395701, + 0.019626090303063393, + -0.02022009901702404, + 0.04051608964800835, + -0.0016887435922399163, + -0.004476838745176792, + 0.04316244274377823, + -0.07061455398797989, + 0.014945793896913528, + -0.024342583492398262, + -0.07555877417325974, + -0.02488061413168907, + 0.05288992077112198, + -0.014725971035659313, + 0.0817783921957016, + 0.040861308574676514, + 0.025976786389946938, + -0.0007990679005160928, + 0.048380907624959946, + -0.01643071509897709, + 0.0304475836455822, + 0.021219527348876, + -0.028680991381406784, + 0.00588693842291832, + 0.034933902323246, + -0.12274087220430374, + -0.1063542291522026, + 0.03752432391047478, + 0.06280914694070816, + 0.04837187007069588, + -0.005929626990109682, + -0.011176642961800098, + -0.0068588959984481335, + 0.07332990318536758, + 0.010608839802443981, + -0.14890243113040924, + -0.05217853561043739, + 0.03172331675887108, + 0.008842132054269314, + -0.036021746695041656, + -0.0007704512681812048, + 0.00022352054656948894, + 0.01422717422246933, + -0.024083197116851807, + 0.03981217369437218, + 0.019911538809537888, + 0.0943899154663086, + 0.0020268652588129044, + 0.048203468322753906, + -0.013482272624969482, + -0.07065846025943756, + 0.014181027188897133, + -0.05876666307449341, + 0.003275815164670348, + -0.04369194433093071, + 0.03385254740715027, + -0.043839287012815475, + 0.06243090704083443, + -0.0298730731010437, + -0.013840597122907639, + 0.07008213549852371, + -0.03560657054185867, + -0.06703665107488632, + 0.02944488637149334, + 0.05646831914782524, + -0.03390175476670265, + 0.07127062976360321, + 0.01649673469364643, + -0.02693614549934864, + -0.07139120250940323, + -0.07321657985448837, + 0.07671328634023666, + -4.456809321983946e-33, + 0.023896191269159317, + 0.09456683695316315, + 7.631217158632353e-05, + 0.0020719633903354406, + 0.10064509510993958, + 0.07494156807661057, + -0.02089782804250717, + 0.01533470582216978, + 0.03034726157784462, + -0.0058396682143211365, + -0.022854655981063843, + -0.03741896152496338, + 0.03622424602508545, + 0.045642293989658356, + -0.10225757956504822, + 0.053572725504636765, + 0.09479650110006332, + 0.06655433028936386, + 0.008523122407495975, + 0.02390572801232338, + 0.1307642161846161, + 0.004884455353021622, + 0.009571892209351063, + 0.004908605013042688, + 0.008563573472201824, + 0.03072134032845497, + 0.03431534394621849, + 0.06559021770954132, + 0.03817035257816315, + 0.03405508026480675, + -0.010022249072790146, + -0.05784042924642563, + 0.08795295655727386, + -0.017935728654265404, + 0.04056176170706749, + -0.010452117770910263, + -0.0043557616882026196, + -0.07645668834447861, + 0.017456594854593277, + 0.029208840802311897, + -0.03892463073134422, + -0.03733145818114281, + 0.03961646184325218, + -0.03340447321534157, + -0.14091329276561737, + 0.05262599512934685, + -0.02229633554816246, + -0.04128557816147804, + 0.10736940056085587, + 0.0219231266528368, + -0.07073383778333664, + 0.025094568729400635, + 0.03249846026301384, + 0.041703153401613235, + 0.021527951583266258, + -0.007863311097025871, + 0.004558945540338755, + -0.047509558498859406, + 0.016495579853653908, + -0.08661267906427383, + 0.08075810968875885, + 0.04040290787816048, + -0.05833826959133148, + 0.013957316055893898, + 0.03553760051727295, + -0.0615471713244915, + -0.028720451518893242, + -0.1060466468334198, + 0.022067103534936905, + -0.1124013289809227, + -0.04936547204852104, + 0.009042881429195404, + 0.03821277245879173, + 0.010275226086378098, + -0.026160305365920067, + 0.05564216524362564, + 0.06134744733572006, + -0.03146400675177574, + 0.04470425471663475, + -0.09301458299160004, + -0.0458662211894989, + -0.02908039093017578, + -0.053536366671323776, + 0.02112414501607418, + 0.01378999650478363, + 0.016992278397083282, + 0.04274429753422737, + -0.0493905283510685, + -0.014555824920535088, + 0.03152507171034813, + 0.02878335677087307, + -0.014487138018012047, + -0.02666473761200905, + -0.0914408415555954, + -0.0541544072329998, + 1.584157349608556e-33, + 0.020983515307307243, + 0.05347197875380516, + -0.06661040335893631, + 0.0009038225398398936, + 0.031801026314496994, + 0.023325107991695404, + -0.03610644489526749, + 0.029515573754906654, + 0.17615236341953278, + -0.06718979775905609, + -0.025467703118920326, + -0.08531614392995834, + 0.04049639031291008, + -0.01805424690246582, + -0.04524073749780655, + -0.04280829802155495, + 0.10139468312263489, + 0.029113303869962692, + 0.060669124126434326, + 0.005392046179622412, + -0.06589948385953903, + 0.007843609899282455, + 0.018318859860301018, + -0.06844128668308258, + 0.06228259950876236, + 0.0067151812836527824, + 0.15284965932369232, + -0.020420772954821587, + -0.15761908888816833, + 0.007363060023635626, + 0.03968852385878563, + -0.01750367321074009, + -0.0853145644068718, + 0.018123609945178032, + -0.016327766701579094, + 0.009068207815289497, + -0.0558089055120945, + 0.09528887271881104, + 0.04409673064947128, + 0.03809195011854172, + 0.034919656813144684, + -0.07533685117959976, + 0.004587229341268539, + 0.06575445830821991, + 0.05714786425232887, + -0.027943694964051247, + -0.12656234204769135, + -0.035893142223358154, + 0.03594524785876274, + 0.03932963311672211, + -0.08188361674547195, + -0.004420648794621229, + 0.011425483040511608, + 0.056083936244249344, + -0.008442560210824013, + -0.04040393605828285, + -0.026302991434931755, + -0.05984058603644371, + 0.04202187433838844, + -0.0004535859916359186, + 0.01906391978263855, + 0.0344066359102726, + -0.056275367736816406, + -0.05575418099761009, + 0.0031951202545315027, + -0.03663144260644913, + -0.05380155146121979, + -0.02139953337609768, + -0.002407937543466687, + -0.02029218152165413, + -0.0038107922300696373, + -0.032218847423791885, + -0.06176019832491875, + 0.12360663712024689, + 0.008043625392019749, + 0.03380918130278587, + -0.011053069494664669, + -0.05986086279153824, + 0.034162502735853195, + -0.0472533293068409, + -0.04168698191642761, + -0.04694301262497902, + -0.0260507483035326, + -0.02609090879559517, + 0.04234694316983223, + -0.041649796068668365, + 0.006260088179260492, + 0.005315860267728567, + 0.03272666037082672, + 0.047357670962810516, + 0.08116107434034348, + 0.02483230084180832, + 0.056733813136816025, + -0.008381973952054977, + -0.012605291791260242, + -2.6700323729755837e-08, + 0.01736389845609665, + 0.020590709522366524, + -0.08382880687713623, + -0.049745891243219376, + 0.06646019220352173, + -0.05172014981508255, + 0.03654057905077934, + -0.09228933602571487, + 0.03999175503849983, + -0.02021024562418461, + -0.04262700304389, + 0.05382781848311424, + 0.004780337680131197, + -0.0806218832731247, + 0.012840602546930313, + 0.0038513035979121923, + 0.044516872614622116, + -0.028158852830529213, + -0.0896618515253067, + -0.013239125721156597, + 0.04167158529162407, + 0.033077601343393326, + 0.017056217417120934, + -0.1276015192270279, + -0.04771357402205467, + 0.02050425298511982, + -0.03143446892499924, + -0.058038435876369476, + -0.057369034737348557, + 0.053105469793081284, + -0.02247268334031105, + 0.05122159421443939, + 0.05061115697026253, + -0.0592089481651783, + -0.07698394358158112, + 0.11378926783800125, + 0.01153957936912775, + 0.022280460223555565, + 0.013087255880236626, + 0.004714030306786299, + 0.07125484943389893, + -0.04531160742044449, + -0.029442181810736656, + -0.028510523959994316, + 0.022433869540691376, + -0.022116634994745255, + 8.011646423256025e-05, + -0.08019818365573883, + 0.011157535947859287, + 0.02311084233224392, + -0.034853093326091766, + 0.019541827961802483, + 0.049631621688604355, + 0.008129246532917023, + -0.021480318158864975, + -0.0024885504972189665, + 0.014285089448094368, + -0.018570106476545334, + 0.08718406409025192, + 0.01726202294230461, + -0.034702301025390625, + -0.014993829652667046, + 0.00255575287155807, + -0.04470689967274666 + ] + }, + { + "type": "NarrativeText", + "element_id": "e37f7a0354c187111dbbd76c353a8548", + "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.0486200787127018, + 0.03687998652458191, + 0.08913004398345947, + 0.02740296721458435, + -0.010071247816085815, + 0.10595864802598953, + 0.06342905014753342, + -0.006530109327286482, + -0.013889482244849205, + -0.08802182227373123, + -0.07767900824546814, + 0.03545078635215759, + -0.028766684234142303, + -0.07743863016366959, + -0.04468623921275139, + -0.0037754878867417574, + 0.03962993994355202, + 0.009885626845061779, + 0.019793041050434113, + 0.021323993802070618, + -0.002675591967999935, + -0.024544131010770798, + 0.0056113870814442635, + 0.0578371025621891, + -0.02803550288081169, + -0.038070160895586014, + -0.00041047457489185035, + -0.04367060959339142, + -0.05767305567860603, + 0.06916482746601105, + -0.019221553578972816, + 0.0378342904150486, + -0.052809860557317734, + 0.04571171849966049, + 0.028409209102392197, + -0.008392543531954288, + -0.03478813171386719, + 0.009684890508651733, + 0.04222659021615982, + 0.06265994161367416, + 0.046711452305316925, + -0.07935523986816406, + -0.056779175996780396, + 0.006064460147172213, + -0.07046529650688171, + -0.05251942202448845, + 0.034053314477205276, + -0.040065519511699677, + 0.026450546458363533, + -0.01455377135425806, + -0.08697941899299622, + 0.030640551820397377, + -0.0813479945063591, + -0.11330573260784149, + 0.03267942741513252, + 0.07518131285905838, + 0.0211882833391428, + -0.0101212402805686, + 0.07134700566530228, + 0.03555462136864662, + 0.015815099701285362, + 0.012798494659364223, + 0.07203026115894318, + 0.05252337455749512, + -0.013321940787136555, + 0.019010035321116447, + -0.04423129931092262, + 0.0501399002969265, + -0.13513483107089996, + 0.08778797835111618, + -0.06448494642972946, + 0.002710366854444146, + -0.006125647574663162, + -0.009832946583628654, + -0.08841821551322937, + -0.033734314143657684, + -0.029915349557995796, + -0.07407321035861969, + 0.012009759433567524, + 0.015418943017721176, + -0.08306631445884705, + -0.10689456760883331, + -0.06608614325523376, + -0.03254549577832222, + 0.017948448657989502, + -0.03686050698161125, + 0.07100754231214523, + -0.06899391114711761, + 0.022355591878294945, + 0.010308916680514812, + -0.02578306384384632, + 0.0421190969645977, + -0.03190562501549721, + 0.04253567010164261, + -0.02041158266365528, + 0.03507411479949951, + 0.02260320633649826, + -0.02040167711675167, + -0.07323514670133591, + 0.11387626081705093, + 0.008910990320146084, + 0.08846545964479446, + 0.017315223813056946, + 0.03806573152542114, + -0.029453329741954803, + -0.015634024515748024, + 0.009224730543792248, + -0.01623440906405449, + -0.016330881044268608, + -0.04198376461863518, + 0.008446392603218555, + 0.0075980001129209995, + 0.10469182580709457, + -0.03243805095553398, + 0.026653125882148743, + 0.004351378884166479, + 0.06812883168458939, + -0.030507123097777367, + 0.06144857779145241, + 0.010282279923558235, + 0.07976135611534119, + 0.004356532823294401, + 0.036928337067365646, + 0.06752532720565796, + -0.05194714292883873, + -0.020909544080495834, + 0.060606107115745544, + -3.4575350282501063e-34, + 0.06664283573627472, + -0.013305830769240856, + 0.1437346488237381, + 0.05551281198859215, + 0.02938142418861389, + -0.022709250450134277, + -0.03923968970775604, + 0.008861655369400978, + 0.044513039290905, + -0.01267368346452713, + 0.006724163889884949, + -0.10200199484825134, + 0.015488700941205025, + -0.10154026746749878, + 0.005830892361700535, + 0.053493745625019073, + -0.030349310487508774, + -0.024011699482798576, + 0.08674285560846329, + -0.02435682900249958, + -0.02954505756497383, + -0.08889515697956085, + -0.044003356248140335, + -0.018109647557139397, + -0.10312677174806595, + -0.04186996817588806, + 0.04156767576932907, + -0.01929759792983532, + 0.06481113284826279, + 0.009118330664932728, + -0.0038376804441213608, + 0.0536307767033577, + 0.06863446533679962, + 0.00452498160302639, + 0.008223271928727627, + -0.060197994112968445, + -0.06310275197029114, + -0.01502180378884077, + -0.013857019133865833, + -0.02473541907966137, + -0.025698484852910042, + -0.0315413698554039, + 0.12786005437374115, + 0.001188497873954475, + -0.007573425304144621, + 0.043278735131025314, + 0.01414393074810505, + 0.04244668036699295, + 0.00396665558218956, + -0.006093735806643963, + -0.030611569061875343, + -0.03642860800027847, + 0.07054053246974945, + 0.042917750775814056, + 0.010188819840550423, + -0.0537363737821579, + -0.039507195353507996, + 0.005340895615518093, + -0.022317033261060715, + -0.05836333706974983, + 0.06633792072534561, + -0.08509992063045502, + 0.025149282068014145, + -0.06464406847953796, + 0.056099049746990204, + 0.016795488074421883, + -0.0030728343408554792, + -0.02426709048449993, + -0.00023277585569303483, + -0.056554801762104034, + -0.002543054288253188, + 0.0791889950633049, + -0.05696767568588257, + -0.007267802953720093, + -0.024808255955576897, + 0.040404945611953735, + 0.07221050560474396, + 0.002430019434541464, + 0.015402681194245815, + -0.0687580555677414, + 0.06608901917934418, + 0.0035699931904673576, + -0.06248387321829796, + 0.05324600264430046, + 0.09190323203802109, + 0.0009737858781591058, + -0.0031378555577248335, + -0.12396568059921265, + -0.014093366451561451, + 0.09479108452796936, + 0.025567205622792244, + -0.06639507412910461, + -0.025462470948696136, + -0.05062700808048248, + -0.1386348009109497, + -3.652968770091485e-34, + -0.01269698515534401, + 0.008826198987662792, + 0.06109125539660454, + 0.10967045277357101, + 0.02726907655596733, + -0.04344628378748894, + -0.005868025589734316, + 0.023258009925484657, + 0.05267712101340294, + -0.030711913481354713, + -0.05221112444996834, + -0.03368792682886124, + 0.13911475241184235, + -0.05078120902180672, + 0.015582331456243992, + -0.033206500113010406, + 0.018242647871375084, + -0.07240897417068481, + -0.01647120527923107, + -0.00954617839306593, + 0.04724113643169403, + 0.016156530007719994, + -0.030115395784378052, + -0.030002685263752937, + -0.0002849442244041711, + 0.06315215677022934, + 0.01843206211924553, + -0.0102812135592103, + -0.0846731886267662, + -0.025300929322838783, + 0.12236669659614563, + 0.004572718869894743, + -0.05839154124259949, + 0.08561071753501892, + -0.024420227855443954, + 0.04175037518143654, + 0.06704739481210709, + -0.0779973715543747, + -0.08636748045682907, + -0.00674787349998951, + -0.04664289578795433, + -0.046093594282865524, + -0.04156400263309479, + -0.0014965439913794398, + -0.001259968732483685, + -0.0121543537825346, + -0.011674032546579838, + 0.02481495961546898, + 0.0067732203751802444, + 0.029428301379084587, + -0.017223220318555832, + 0.02029380388557911, + -0.12813477218151093, + 0.11289172619581223, + -0.03552662581205368, + 0.003350534476339817, + 0.017396023496985435, + -0.001771645969711244, + 0.07063546776771545, + 0.015264755114912987, + -0.07010585069656372, + 0.021178683266043663, + -0.05190861225128174, + 0.017682252451777458, + 0.01634710095822811, + -0.05661784112453461, + -0.011006166227161884, + 0.08239301294088364, + -0.017991825938224792, + -0.002860717009752989, + 0.03403080627322197, + -0.03447557985782623, + -0.07497280836105347, + 0.017877522855997086, + 0.02490200474858284, + 0.0011251169489696622, + 0.04814967140555382, + -0.011577640660107136, + -0.05788547918200493, + 0.03651382401585579, + -0.014192771166563034, + -0.04319233447313309, + -0.026730641722679138, + -0.011179517023265362, + 0.006381201557815075, + -0.01330598071217537, + 0.0729939341545105, + -0.02967524155974388, + -0.019634032621979713, + 0.026102008298039436, + 0.032379671931266785, + -0.0049930959939956665, + 0.0515344999730587, + -0.08542999625205994, + 0.01472156960517168, + -3.33895613380264e-08, + 0.044885821640491486, + -0.04714803770184517, + -0.0640324279665947, + -0.13860778510570526, + 0.03632424771785736, + -0.03801967576146126, + 0.024571441113948822, + 0.008783490397036076, + -0.07595615088939667, + 0.02360796183347702, + 0.10317737609148026, + -0.0855007916688919, + 0.06889460980892181, + -0.00648866081610322, + 0.08344215899705887, + 0.030783556401729584, + 0.0793389081954956, + -0.05401010811328888, + -0.022299574688076973, + -0.012930418364703655, + 0.06527602672576904, + 0.0033282111398875713, + 0.062170665711164474, + -0.0289220679551363, + -0.008504523895680904, + 0.04795003682374954, + 0.021415913477540016, + 0.0014492444461211562, + -0.04957083612680435, + 0.027479158714413643, + 0.09821036458015442, + 0.03126221150159836, + 0.017651252448558807, + -0.0004344964399933815, + -0.011527231894433498, + 0.04417389631271362, + -0.03349463269114494, + -0.03799552097916603, + 0.021515104919672012, + -0.023998333141207695, + -0.04135696962475777, + 0.03809981048107147, + -0.04286262392997742, + 0.0785767138004303, + 0.03905704990029335, + -0.0793391615152359, + 0.013255833648145199, + 0.017900139093399048, + -0.039659105241298676, + -0.04010462760925293, + -0.0003518355661071837, + -0.0019700382836163044, + 0.020311318337917328, + -0.023235665634274483, + -0.04101383686065674, + -0.04207604005932808, + 0.03147193044424057, + 0.05311310291290283, + -0.06019069626927376, + 0.006541392765939236, + 0.14579349756240845, + 0.08224166184663773, + -0.07540097832679749, + -0.07972783595323563 + ] + }, + { + "type": "NarrativeText", + "element_id": "ff53e9e3e2c0a40c878f3117dfaee12e", + "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.01821092516183853, + 0.14189432561397552, + 0.06296979635953903, + 0.02143603190779686, + -0.034299086779356, + 0.0876535102725029, + 0.09852343052625656, + -0.04676198586821556, + -0.032796524465084076, + -0.057929232716560364, + -0.04649098217487335, + -0.023917673155665398, + 0.004826286341995001, + -0.06528618186712265, + 0.013964714482426643, + -0.00030670486739836633, + -0.017573034390807152, + 0.040330834686756134, + 0.0012798127718269825, + 0.11687064170837402, + -0.027677394449710846, + 0.060690656304359436, + 0.004045288078486919, + 0.0030521771404892206, + -0.04602682217955589, + 0.009411393664777279, + 0.028842858970165253, + -0.029541144147515297, + -0.014768578112125397, + 0.018804389983415604, + -0.018149763345718384, + -0.0018351302715018392, + -0.005772416014224291, + 0.0782475695014, + -0.09304409474134445, + -0.003969996236264706, + -0.025508679449558258, + -0.0327136293053627, + 0.027754027396440506, + -0.00766820227727294, + -0.012167279608547688, + -0.042259518057107925, + -0.022764062508940697, + 0.07541149109601974, + 0.03192087635397911, + -0.0191128421574831, + -0.0809827595949173, + -0.024734744802117348, + 0.04502084106206894, + -0.01547537837177515, + -0.09272019565105438, + 0.03354223817586899, + 0.03152655065059662, + -0.07030364125967026, + -0.04977883771061897, + 0.02008303999900818, + 0.03791828453540802, + 0.0015579544706270099, + 0.040449850261211395, + 0.07562998682260513, + 0.0017830337164923549, + 0.05042259395122528, + 0.10320527851581573, + 0.0470566526055336, + -0.04768459126353264, + -0.10446310043334961, + 0.05923541262745857, + 0.020012550055980682, + -0.06190073862671852, + 0.1021382287144661, + 0.04225445166230202, + -0.006701563484966755, + -0.012615523301064968, + -0.11128805577754974, + -0.04897299036383629, + -0.011014267802238464, + 0.006252156104892492, + -0.123118095099926, + -0.011414051987230778, + 0.015304232016205788, + -0.004650868941098452, + -0.06636469811201096, + -0.07129022479057312, + 0.06300666928291321, + -0.04842931032180786, + -0.09559126198291779, + 0.04948113486170769, + -0.08089607208967209, + -0.04187162220478058, + 0.025684669613838196, + 0.004877245984971523, + -0.021023446694016457, + -0.0633840411901474, + 0.04149612784385681, + 0.02692948468029499, + 0.03656553104519844, + 0.01544942706823349, + -0.04899546876549721, + -0.06652054190635681, + 0.05073947831988335, + 0.051620397716760635, + 0.0516577810049057, + -0.025756951421499252, + 0.027645181864500046, + -0.09982189536094666, + 0.023715786635875702, + -0.02607894502580166, + -0.08350104838609695, + -0.05890771746635437, + -0.010147854685783386, + -0.0343785397708416, + -0.11381182074546814, + 0.011026830412447453, + -0.11859532445669174, + -0.022935010492801666, + 0.047872237861156464, + 0.05839494615793228, + -0.038273610174655914, + -0.009550919756293297, + -0.014361999928951263, + 0.06346395611763, + 0.024537542834877968, + -0.033443935215473175, + 0.11250730603933334, + -0.06982149183750153, + -0.035364434123039246, + 0.004629208706319332, + 1.7470009697508675e-33, + 0.03947211802005768, + 0.04033314436674118, + 0.050032228231430054, + 0.0627869963645935, + 0.049182530492544174, + 0.037862639874219894, + -0.0022653292398899794, + -0.0003670032892841846, + -0.03914610669016838, + -0.03560233116149902, + -0.014580338262021542, + -0.031028801575303078, + 0.0023757971357554197, + -0.028890565037727356, + -0.10088590532541275, + 0.11166083812713623, + -0.06596997380256653, + -0.009850457310676575, + 0.06851842999458313, + 0.02236993797123432, + 0.007712032645940781, + 0.07309041172266006, + -0.02976348251104355, + 0.07449772208929062, + -0.04152586683630943, + 0.023668071255087852, + 0.040782857686281204, + -0.007492564618587494, + -0.02654392085969448, + 0.010241925716400146, + -0.049055520445108414, + 0.015456810593605042, + 0.05478426069021225, + 0.00742486584931612, + 0.02551720105111599, + -0.008687573485076427, + -0.04240753874182701, + -0.0480441115796566, + -0.07281585782766342, + 0.03303362801671028, + 0.003399749519303441, + -0.002322735032066703, + -0.007235710974782705, + -0.012058865278959274, + -0.1584135890007019, + 0.046748436987400055, + -0.05691584572196007, + 0.0021024185698479414, + -0.004185501020401716, + -0.07694175094366074, + -0.021312419325113297, + 0.007585803512483835, + -0.006906776688992977, + 0.03139781951904297, + -0.023958295583724976, + -0.01615755446255207, + 0.004572177771478891, + 0.04501872882246971, + 0.06532134860754013, + -0.03778345510363579, + 0.10829175263643265, + -0.03116547502577305, + 0.06857898831367493, + -0.0019906125962734222, + -0.020024165511131287, + -0.12234478443861008, + -0.0775686502456665, + 0.0522063784301281, + -0.0482671856880188, + -0.006218526046723127, + -0.03172216936945915, + 0.10647479444742203, + -0.004311352502554655, + -0.020344866439700127, + -0.028779391199350357, + 0.010282316245138645, + -0.08016344159841537, + 0.04328654333949089, + -0.0010577351786196232, + -0.06618639081716537, + 0.03898295387625694, + 0.0017452107276767492, + 0.022284498438239098, + 0.02565678395330906, + 0.01639074645936489, + -0.06552040576934814, + -0.034404560923576355, + -0.05710839852690697, + -0.03641461208462715, + 0.13124780356884003, + -0.015124828554689884, + 0.04237808659672737, + 0.07488367706537247, + -0.10918772220611572, + -0.0890873447060585, + -5.8269750881940045e-33, + -0.020827779546380043, + -0.0030878253746777773, + -0.08207237720489502, + 0.09733552485704422, + -0.008875328116118908, + -0.04047126695513725, + -0.07314589619636536, + 0.07006905972957611, + -0.0044971248134970665, + -0.013727357611060143, + 0.0337195061147213, + -0.0396072082221508, + 0.09562670439481735, + -0.08862382918596268, + -0.0176002886146307, + -0.03419343754649162, + 0.10651561617851257, + 0.05324796214699745, + -0.031437311321496964, + 0.06551411747932434, + 0.06156589835882187, + -0.03881298005580902, + 0.00987060647457838, + -0.028915315866470337, + -0.011992313899099827, + 0.009035706520080566, + 0.0987909585237503, + -0.007158723659813404, + -0.11763091385364532, + 0.034273210912942886, + 0.03440478444099426, + 0.011127408593893051, + -0.057399339973926544, + 0.060642633587121964, + 0.0061752330511808395, + 0.09144444018602371, + 0.04691409692168236, + -0.033194947987794876, + 0.010019433684647083, + -0.007008041720837355, + -0.003974898252636194, + -0.05783271789550781, + 0.01834559254348278, + 0.06880045682191849, + 0.06445209681987762, + -0.017780110239982605, + -0.02187952771782875, + 0.03970014676451683, + 0.05623394250869751, + -0.02333047427237034, + -0.04692262411117554, + 0.01507568173110485, + -0.037322040647268295, + 0.013420004397630692, + -0.030217880383133888, + -0.026605617254972458, + 0.0004773407708853483, + -0.04241754114627838, + 0.08805423974990845, + 0.006248042918741703, + -0.03223811462521553, + -0.023605985566973686, + -0.07872776687145233, + -0.05528227612376213, + -0.026779508218169212, + 0.07355593144893646, + 0.015953045338392258, + 0.04494727402925491, + 0.07241591811180115, + -0.040916766971349716, + 0.04634455591440201, + 0.014152943156659603, + -0.0031511590350419283, + 0.08645723760128021, + 0.0026694193948060274, + 0.045034635812044144, + 0.04822566732764244, + -0.07435242086648941, + 0.004517827648669481, + -0.021706685423851013, + -0.0002275058359373361, + -0.08234336972236633, + -0.03115137293934822, + -0.05293334275484085, + 0.062102388590574265, + -0.0447835698723793, + 0.0011613434180617332, + -0.013380882330238819, + -0.08765745162963867, + -0.01565646380186081, + -0.024476416409015656, + 0.006487371399998665, + -0.01591992937028408, + -0.09806496649980545, + 0.02603817731142044, + -5.756267640322221e-08, + -0.04758313298225403, + -0.043284494429826736, + -0.05637137591838837, + -0.057699792087078094, + -6.816215318394825e-05, + -0.004204032476991415, + -0.004778987728059292, + -0.0270430576056242, + 0.013572372496128082, + 0.006340988911688328, + -0.0012254661414772272, + -0.005992516875267029, + 0.04482017457485199, + -0.024133719503879547, + 0.07855549454689026, + 0.008333049714565277, + -0.02224978804588318, + -0.011054149828851223, + -0.049433838576078415, + 0.01750880666077137, + 0.03386346995830536, + 0.027341945096850395, + -0.011000368744134903, + -0.056646328419446945, + -0.03483385220170021, + 0.005601553712040186, + 0.017894670367240906, + -0.03889445960521698, + -0.03091748058795929, + -0.04818898066878319, + 0.042932868003845215, + 0.02030121721327305, + -0.10848958045244217, + -0.05014633387327194, + -0.05427028238773346, + 0.0692223310470581, + 0.08041664958000183, + -0.07030846178531647, + 0.16752898693084717, + -0.07572438567876816, + -0.029638761654496193, + -0.007868473418056965, + -0.06977296620607376, + 0.006647302769124508, + 0.033394474536180496, + -0.007479218766093254, + 0.030018849298357964, + 0.004089188762009144, + 0.012770390138030052, + 0.05324980616569519, + 0.00396936759352684, + 0.02976180613040924, + 0.010942192748188972, + 0.04780486971139908, + 0.006235879380255938, + -0.03354155272245407, + 0.06135937198996544, + 0.04686572030186653, + -0.05125610530376434, + 0.03666787967085838, + 0.06145469471812248, + 0.0174971092492342, + -0.027414878830313683, + -0.01738799922168255 + ] + }, + { + "type": "NarrativeText", + "element_id": "944f331f0e9f276612e232b36f28b5f6", + "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + 0.0160987488925457, + 0.12206719815731049, + 0.10550276935100555, + -0.003016919130459428, + -0.009118695743381977, + -0.02610720507800579, + 0.11645600944757462, + 0.0013343183090910316, + -0.06382511556148529, + -0.07452850043773651, + -0.06803739070892334, + 0.059326350688934326, + -0.021553203463554382, + -0.014978380873799324, + -0.0011992462677881122, + 0.04087411239743233, + 0.02088122069835663, + -0.07886743545532227, + -0.08999329805374146, + 0.07935325801372528, + 0.05901940166950226, + 0.018977971747517586, + 0.0910627618432045, + -0.05023808404803276, + 0.0005669437814503908, + -0.04943303391337395, + 0.06656934320926666, + 0.007086663041263819, + -0.02067398652434349, + 0.03505444526672363, + -0.0467815026640892, + -0.00972852110862732, + 0.00017310064868070185, + 0.0620654821395874, + -0.06824933737516403, + 0.05402690917253494, + 0.0423208624124527, + 0.01711539924144745, + 0.029098892584443092, + -0.046280041337013245, + -0.042063526809215546, + -0.028023062273859978, + -0.04234214127063751, + 0.02082713134586811, + 0.02718690037727356, + -0.000898190715815872, + -0.0022007173392921686, + 0.0066387588158249855, + -0.001233826857060194, + -0.091705821454525, + -0.10048782080411911, + 0.011058936826884747, + -0.1791040599346161, + -0.02567821927368641, + 0.018635226413607597, + 0.10850800573825836, + 0.04776740446686745, + 0.07271455228328705, + -0.02616521529853344, + -0.02535526268184185, + -0.08750185370445251, + -0.13322927057743073, + 0.05995911359786987, + 0.0165922399610281, + 0.0417923778295517, + 0.06866782158613205, + 0.0037902214098721743, + 0.007516564801335335, + -0.12451987713575363, + 0.043020378798246384, + -0.008502606302499771, + 0.007917962037026882, + -0.01735038124024868, + 0.008838622830808163, + -0.06485219299793243, + -0.06691090017557144, + 0.03426593169569969, + 0.02400810457766056, + 0.05266360938549042, + 0.04014585539698601, + -0.04925774037837982, + 0.04040862247347832, + -0.03167679160833359, + 0.04269404336810112, + -0.038647040724754333, + -0.11679279059171677, + 0.04526420682668686, + -0.11612903326749802, + 0.011789215728640556, + 0.048795875161886215, + -0.09023935347795486, + 0.01673690602183342, + -0.02596202865242958, + 0.0006217050831764936, + -0.0374206006526947, + 0.06419851630926132, + -0.06578665971755981, + 0.003506947075948119, + -0.14764675498008728, + 0.047016941010951996, + 0.16012611985206604, + 0.05403438210487366, + -0.0016634081257507205, + 0.009381191805005074, + 0.007924630306661129, + 0.017867764458060265, + -0.050792064517736435, + -0.029873419553041458, + -0.12929809093475342, + -0.015970319509506226, + -0.00815843790769577, + -0.04533935710787773, + -0.016188418492674828, + -0.08447936177253723, + 0.04993674159049988, + 0.036467842757701874, + 0.011774512007832527, + 0.032003555446863174, + -0.023724256083369255, + -0.000642789585981518, + -0.018292110413312912, + -0.016456248238682747, + -0.022234966978430748, + 0.11142498254776001, + -0.011918253265321255, + -0.06907559931278229, + 0.021046197041869164, + 8.309481403886439e-34, + 0.04458508640527725, + 0.012564144097268581, + 0.007249556481838226, + -0.006804576609283686, + -0.0062193358317017555, + -0.010936948470771313, + -0.021314941346645355, + 0.011712024919688702, + -0.007926397025585175, + -0.02130422182381153, + 0.02477291040122509, + -0.034009698778390884, + 0.09787911921739578, + 0.02226019836962223, + -0.10160926729440689, + 0.01386685948818922, + -0.012803786434233189, + 0.007577549200505018, + 0.06363653391599655, + -0.030455658212304115, + 0.030187422409653664, + 0.043164800852537155, + -0.01825382001698017, + -0.06823723763227463, + -0.05853891000151634, + -0.0786886140704155, + 0.02740161307156086, + -0.007098027970641851, + 0.06252353638410568, + 0.03046920709311962, + -0.01957341842353344, + 0.03377342224121094, + -0.011660708114504814, + 0.010068544186651707, + 0.03804582729935646, + 0.029149286448955536, + -0.027130253612995148, + 0.04569420963525772, + -0.019928008317947388, + 0.005420615430921316, + 0.053901076316833496, + 0.08124633878469467, + -0.012646418064832687, + -0.04256311431527138, + -0.06275376677513123, + 0.07635440677404404, + -0.032438457012176514, + 0.0056598372757434845, + -0.011065983213484287, + -0.02847384661436081, + 0.0006818020483478904, + 0.0010741211008280516, + 0.07769519090652466, + 0.05876917019486427, + -0.013427291996777058, + 0.01110050082206726, + 0.04551602900028229, + -0.02639813907444477, + 0.02568582445383072, + -0.09366093575954437, + 0.05352219566702843, + -0.048836588859558105, + -0.008104460313916206, + -0.010864963755011559, + -0.013614559546113014, + 0.004153646994382143, + -0.07175567746162415, + -0.08854752033948898, + -0.03500457853078842, + -0.05254848673939705, + -0.02761487476527691, + 0.08625136315822601, + -0.03546270355582237, + -0.028873346745967865, + -0.10159868746995926, + -0.028719620779156685, + -0.034929852932691574, + -0.017042865976691246, + 0.07445263117551804, + -0.0438823327422142, + 0.05320775881409645, + 0.04853731766343117, + -0.12567442655563354, + -0.009663442149758339, + -0.045269858092069626, + -0.035887815058231354, + 0.017629269510507584, + -0.005348446778953075, + -0.01074414886534214, + 0.05340661481022835, + -0.029768258333206177, + 0.004949328489601612, + 0.012169799767434597, + -0.01557342428714037, + -0.09747887402772903, + -3.104125728164507e-33, + 0.019451318308711052, + 0.009628890082240105, + -0.027822716161608696, + 0.09368891268968582, + -0.0359320268034935, + 0.010453319177031517, + -0.028667831793427467, + 0.022156616672873497, + 0.06766413897275925, + 0.04893947392702103, + -0.0015285778790712357, + -0.04352250695228577, + 0.1032257154583931, + -0.08220577985048294, + -0.04291156306862831, + 0.014294409193098545, + 0.014013644307851791, + -0.03485838696360588, + -0.013450801372528076, + -0.002059179823845625, + 0.017987051978707314, + -0.006127211730927229, + 0.07154533267021179, + 0.04335157200694084, + -0.014778283424675465, + 0.021157339215278625, + 0.06065082177519798, + -0.04877574369311333, + -0.05862700194120407, + -0.08890951424837112, + 0.09608693420886993, + 0.000989934429526329, + -0.0330541618168354, + 0.019966650754213333, + 0.0017701018368825316, + 0.016169700771570206, + -0.047474440187215805, + 0.02289857342839241, + -0.07072478532791138, + 0.0047415923327207565, + 0.03560173138976097, + 0.0012952637625858188, + -0.03315557539463043, + 0.008455480448901653, + -3.915580236935057e-06, + -0.020027857273817062, + 0.02139333076775074, + -0.10040562599897385, + -0.014806630089879036, + -0.037951335310935974, + -0.01407585944980383, + -0.06112402305006981, + 0.06571066379547119, + 0.04188748821616173, + 0.023539962247014046, + -0.05987746641039848, + 0.008518475107848644, + 0.02684272639453411, + 0.041387781500816345, + -0.023006778210401535, + 0.002272665733471513, + -0.06703060120344162, + 0.008017444051802158, + 0.017659533768892288, + 0.02384805493056774, + 0.043777212500572205, + 0.0014354323502629995, + -0.030951501801609993, + 0.05077676475048065, + 0.0413103811442852, + 0.023455707356333733, + 0.044847529381513596, + -0.012993748299777508, + -0.07765237241983414, + 0.06419996172189713, + 0.07914780080318451, + -0.010106937028467655, + -0.04605785757303238, + -0.08842470496892929, + -0.0031189776491373777, + 0.02931838668882847, + -0.06857184320688248, + 0.037887103855609894, + -0.03523596376180649, + -0.12513650953769684, + -0.05887523293495178, + -0.018760619685053825, + 0.028921933844685555, + -0.02774149179458618, + 0.1011829748749733, + 0.02542969211935997, + 0.02033134363591671, + 0.02338927425444126, + -0.03361344709992409, + 0.011279008351266384, + -4.0931755762585453e-08, + 0.007863683626055717, + -0.13112497329711914, + -0.047018375247716904, + 0.040921878069639206, + -0.007581941317766905, + 0.07654201984405518, + 0.06498793512582779, + 0.030623607337474823, + -0.0821574479341507, + 0.013813508674502373, + 0.021101392805576324, + 0.06278448551893234, + 0.009766457602381706, + 0.031048975884914398, + 0.01676850952208042, + 0.013764684088528156, + 0.04950806871056557, + -0.0184162687510252, + 0.016119951382279396, + 0.03716488182544708, + 0.06645973771810532, + 0.025534167885780334, + 0.022840550169348717, + 0.00752737931907177, + 0.007892819121479988, + 0.006764095742255449, + 0.018308069556951523, + -0.06779985129833221, + -0.08241978287696838, + 0.041592009365558624, + 0.01858123205602169, + 0.04872805252671242, + 0.004406407941132784, + -0.03522343933582306, + -0.0010236463276669383, + -0.02906103804707527, + -0.0023408690467476845, + 0.03479830548167229, + 0.06409123539924622, + 0.048081208020448685, + 0.030854789540171623, + 0.05560770630836487, + -0.06750526279211044, + 0.02454545348882675, + 0.09970931708812714, + 0.005820556543767452, + 0.053024549037218094, + 0.007615693379193544, + -0.07209456712007523, + -0.012196579948067665, + 0.048657987266778946, + 0.047675877809524536, + -0.04307391867041588, + -0.04726945981383324, + 0.004431556910276413, + 0.03461168706417084, + -0.04459868744015694, + 0.015400650911033154, + -0.07706724107265472, + 0.012848843820393085, + 0.03197341039776802, + 0.1119411438703537, + -0.1051933765411377, + -0.10109532624483109 + ] + }, + { + "type": "NarrativeText", + "element_id": "9ff1d8f7e93d526d0e3a174a51850ec8", + "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.01610482670366764, + 0.051253389567136765, + -0.005827244371175766, + 0.0946529433131218, + -0.027738889679312706, + 0.04864746704697609, + 0.06249867379665375, + -0.054742150008678436, + 0.005165732465684414, + -0.09965430200099945, + -0.014380048029124737, + 0.043135855346918106, + -0.06380167603492737, + 0.016726622357964516, + 0.02624652348458767, + 0.08337194472551346, + -0.010945177637040615, + -0.014145465567708015, + 7.170392200350761e-05, + 0.1330530345439911, + -0.08622105419635773, + 0.017468294128775597, + 0.019615400582551956, + 0.03573539853096008, + -0.06584513932466507, + 0.023946993052959442, + 0.055600881576538086, + -0.005579668562859297, + 0.06655523180961609, + 0.01074758917093277, + -0.015564709901809692, + -0.0782647579908371, + 0.011902868747711182, + 0.04096626862883568, + 0.007362612523138523, + 0.10021872073411942, + -0.07527384907007217, + 0.013052872382104397, + 0.04178478196263313, + 0.008722580969333649, + 0.04301171377301216, + -0.0466691330075264, + -0.010680342093110085, + -0.0453094057738781, + 0.029875552281737328, + -0.07483332604169846, + -0.04754328355193138, + -0.037238407880067825, + 0.013817206025123596, + -0.11729475855827332, + -0.07827996462583542, + 0.0033670763950794935, + -0.126630038022995, + -0.04176899045705795, + 0.03585533797740936, + 0.04992491751909256, + 0.005937905982136726, + 0.037598952651023865, + -0.006769715342670679, + 0.058931466192007065, + 0.0066518220119178295, + 0.05724451318383217, + 0.06277144700288773, + 0.049325790256261826, + -0.0047280495055019855, + 0.049491412937641144, + 0.03973150625824928, + 0.010035641491413116, + -0.10073582828044891, + 0.022169971838593483, + -0.0748724564909935, + -0.041447896510362625, + 0.02511797845363617, + -0.03404435142874718, + -0.05399053916335106, + -0.07167433202266693, + 0.07152591645717621, + -0.042239025235176086, + 0.019727692008018494, + 0.06591752916574478, + 0.030093586072325706, + -0.029379168525338173, + -0.030472930520772934, + -0.013006279245018959, + -0.07183653116226196, + -0.05795527249574661, + 0.06737776845693588, + -0.012468209490180016, + 0.08337192237377167, + -0.008741271682083607, + 0.03279908001422882, + 0.026616854593157768, + 0.029712697491049767, + 0.010602087713778019, + 0.06889163702726364, + 0.025316482409834862, + -0.10485817492008209, + 0.03166160359978676, + -0.048036493360996246, + 0.011014791205525398, + 0.030293148010969162, + 0.05089317634701729, + -0.04250964894890785, + -0.050751376897096634, + -0.01653212495148182, + 0.014843769371509552, + 0.018969597294926643, + -0.1258106380701065, + -0.017050139605998993, + 0.021225249394774437, + -0.11021719872951508, + -0.010988583788275719, + 0.03386014699935913, + -0.05214054882526398, + 0.031074276193976402, + 0.05395654961466789, + -0.018187034875154495, + -0.03948018699884415, + 0.035417187958955765, + -0.025734487920999527, + 0.05824298784136772, + -0.06790591776371002, + 0.018384521827101707, + 0.09200038015842438, + 0.009596297517418861, + 0.002920617815107107, + 0.0547582171857357, + 1.0132876098458316e-33, + 0.06008730083703995, + -0.09051956236362457, + 0.05675506219267845, + 0.004556921310722828, + -0.021692223846912384, + 0.02583886869251728, + 0.0043686325661838055, + -0.08353648334741592, + -0.01712646149098873, + -0.041716162115335464, + -0.017875559628009796, + -0.0905553549528122, + -0.022911299020051956, + -0.03050016611814499, + -0.08570839464664459, + 0.023714805021882057, + -0.05599593371152878, + -0.03581826016306877, + -0.014550252817571163, + 0.04697932302951813, + 0.035754334181547165, + 0.0009538281010463834, + -0.04018554091453552, + 0.025454744696617126, + -0.05843108147382736, + -0.06453957408666611, + 0.1042328029870987, + 0.013875424861907959, + -0.0054871151223778725, + 0.0010717582190409303, + -0.0623110793530941, + -0.006021424196660519, + -0.0301486998796463, + -0.04943401739001274, + 0.02952149696648121, + 0.10211923718452454, + -0.07292632758617401, + 0.13485057651996613, + -0.008966885507106781, + -0.00745486980304122, + -0.04633538797497749, + 0.055415838956832886, + 0.06077098473906517, + 0.0472797155380249, + -0.03842192143201828, + 0.012684573419392109, + -0.05148749053478241, + -0.053674254566431046, + -0.09879987686872482, + -0.01483957376331091, + -0.09974432736635208, + 0.028402220457792282, + 0.007323232013732195, + -0.04016852751374245, + -0.002353284740820527, + 0.029735125601291656, + 0.060454465448856354, + 0.0660121962428093, + 0.014262743294239044, + -0.07497283071279526, + -0.05893218517303467, + -0.0992627814412117, + -0.05170032009482384, + -0.08867470920085907, + -0.024467844516038895, + 0.014855517074465752, + 0.0004901870270259678, + -0.04947167634963989, + -0.06529591977596283, + -0.042360782623291016, + -0.00798081886023283, + 0.05371472239494324, + -0.029511747881770134, + -0.057543180882930756, + -0.1031012162566185, + -0.0026863943785429, + 0.018424520269036293, + 0.007044873666018248, + -0.02756519429385662, + -0.0645735040307045, + -0.01054662186652422, + -0.03435388207435608, + -0.01008880976587534, + 0.09299924224615097, + 0.10657934844493866, + -0.05647190287709236, + -0.05445384234189987, + -0.07525269687175751, + -0.07827100157737732, + 0.07795275747776031, + 0.03933519497513771, + 0.07512398809194565, + 0.12325005978345871, + -0.06564857065677643, + -0.07660938054323196, + -3.5192456806856436e-33, + 0.051001451909542084, + -0.001815524767152965, + -0.08879201114177704, + 0.1472892463207245, + 0.01003198605030775, + 0.050616104155778885, + -0.10411453992128372, + 0.011280640959739685, + -0.05086614936590195, + 0.05897092819213867, + 0.003801255952566862, + -0.02578495442867279, + 0.06590867787599564, + 0.03221657872200012, + -0.01250858511775732, + -0.022222450003027916, + 0.009842997416853905, + 0.029988722875714302, + -0.003391869831830263, + 0.009476670064032078, + -0.04153570905327797, + 0.0586935319006443, + -0.008409341797232628, + -0.008358301594853401, + 0.04683200269937515, + 0.06554033607244492, + 0.08408452570438385, + 0.012960245832800865, + -0.10739961266517639, + -0.047734543681144714, + 0.05281079560518265, + -0.028899669647216797, + -0.10605499893426895, + 0.01027202233672142, + 0.03618710860610008, + 0.05617087706923485, + 0.06084344536066055, + -0.09061265736818314, + -0.05428889021277428, + -0.012963457964360714, + 0.01669113151729107, + -0.016843315213918686, + -0.03512909635901451, + 0.0369216687977314, + -0.013455617241561413, + 0.015560198575258255, + 0.044837694615125656, + -0.038179781287908554, + -0.023380955681204796, + 0.03243395686149597, + -0.03296275436878204, + -0.028760431334376335, + -0.0438658781349659, + 0.030290324240922928, + 0.06805285811424255, + -0.06023217737674713, + -0.020878782495856285, + -0.03806651756167412, + 0.04697103798389435, + -0.05305474251508713, + -0.017459889873862267, + -0.02252122201025486, + -0.07903861999511719, + 0.02356264926493168, + -0.022720448672771454, + -0.021605391055345535, + 0.004828976467251778, + -0.03439774736762047, + -0.0342840738594532, + 0.012214802205562592, + -0.020091809332370758, + 0.03535933047533035, + -0.09349164366722107, + -0.014218185096979141, + -0.018606118857860565, + 0.045896489173173904, + -0.002939624711871147, + -0.06549128890037537, + 0.004408827982842922, + -0.05500965565443039, + -0.00021257322805467993, + -0.028352709487080574, + 0.057422466576099396, + -0.029054734855890274, + -0.06672269850969315, + -0.041449081152677536, + 0.036742787808179855, + 0.039104726165533066, + -0.022849788889288902, + 0.0744740441441536, + -0.010481459088623524, + 0.017465557903051376, + -0.007322149351239204, + -0.08603904396295547, + -0.029187927022576332, + -4.020765231871337e-08, + 0.041963640600442886, + -0.05520766228437424, + -0.08829090744256973, + -0.04435351490974426, + -0.02202286384999752, + 0.0010983736719936132, + 0.036981359124183655, + 0.01366259902715683, + -0.07109334319829941, + 0.11878078430891037, + 0.043454255908727646, + -0.02029673010110855, + 0.048003848642110825, + 0.05399675667285919, + 0.011598952114582062, + 0.02649385668337345, + 0.0697978064417839, + -0.0054353103041648865, + 0.007042796816676855, + 0.03126956894993782, + 0.06965871900320053, + 0.00803336687386036, + 0.034161657094955444, + 0.005839210003614426, + 0.01303977333009243, + 0.05242529511451721, + 0.03213013336062431, + 0.01230533141642809, + -0.0845501571893692, + -0.004462869372218847, + 0.06343656033277512, + 0.0793367326259613, + -0.08516857028007507, + -0.017162848263978958, + -0.004258507862687111, + -0.0640791654586792, + 0.07798890024423599, + -0.025454316288232803, + 0.016501422971487045, + 0.026065481826663017, + 0.010165633633732796, + 0.06678293645381927, + -0.027573484927415848, + 0.011442654766142368, + 0.042290184646844864, + -0.019121816381812096, + 0.041821062564849854, + -0.04179904982447624, + 0.010216153226792812, + 0.046220190823078156, + -0.01021085400134325, + 0.006616220343858004, + 0.048986583948135376, + 0.0017409471329301596, + 0.0059212371706962585, + -0.01859423890709877, + 0.05280453339219093, + 0.053373780101537704, + -0.030217599123716354, + -0.03448875620961189, + 0.05635650083422661, + 0.0384366475045681, + -0.022873779758810997, + -0.1043723076581955 + ] + }, + { + "type": "NarrativeText", + "element_id": "92ccca74969ed207c2b36cecb1b1b677", + "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + 0.01186622679233551, + 0.08889524638652802, + 0.09668370336294174, + 0.023566391319036484, + 0.08067435771226883, + 0.024664603173732758, + 0.16279205679893494, + -0.050379157066345215, + -0.05566130578517914, + -0.053892429918050766, + -0.0786118358373642, + -0.06395156681537628, + -0.04685981944203377, + -0.04426385462284088, + 0.05116280913352966, + 0.016089895740151405, + -0.00420397287234664, + -0.006044737529009581, + -0.026254111900925636, + 0.024689840152859688, + 0.033023636788129807, + -0.06363378465175629, + 0.02108204923570156, + 0.03757423907518387, + -0.04160229489207268, + 0.010709774680435658, + 0.019345013424754143, + -0.02139284461736679, + -0.027126144617795944, + -0.012032419443130493, + -0.07863090932369232, + -0.0724305510520935, + -0.04903899505734444, + 0.06997495144605637, + -0.012854446657001972, + 0.08667638897895813, + 0.03102255053818226, + -0.06254471093416214, + 0.039497580379247665, + 0.044366054236888885, + 0.00010912181460298598, + -0.059773825109004974, + 0.0144281517714262, + 0.024807928130030632, + -0.0034132753498852253, + -0.017541879788041115, + 0.08461368829011917, + 0.005840926896780729, + 0.008691019378602505, + 0.024143585935235023, + -0.04246649518609047, + -0.004703805316239595, + -0.01601824164390564, + -0.10551297664642334, + -0.020300759002566338, + 0.07980875670909882, + 0.07783841341733932, + -0.06705288589000702, + 0.06003677845001221, + -0.006285214796662331, + -0.052225761115550995, + 0.009230413474142551, + -0.0038820900954306126, + 0.11813557147979736, + -0.012421265244483948, + -0.04964108765125275, + 0.019924268126487732, + -0.006942428182810545, + -0.04749898985028267, + -0.009284479543566704, + 0.0021823032293468714, + -0.009434818290174007, + -0.016101660206913948, + -0.03863583877682686, + -0.05542510747909546, + -0.013211006298661232, + 0.04711167886853218, + 0.015680387616157532, + 0.009487555362284184, + 0.027162909507751465, + -0.0265840794891119, + -0.0947253555059433, + -0.02861308678984642, + -0.016398675739765167, + 0.02539748139679432, + -0.05883355066180229, + -0.031065242365002632, + -0.07229422777891159, + -0.011320001445710659, + 0.028573621064424515, + 0.015606793574988842, + -0.05373377725481987, + -0.02842831239104271, + 0.12794798612594604, + 0.00036243401700630784, + -0.021257279440760612, + 0.01217490341514349, + 0.052161503583192825, + -0.011883416213095188, + 0.07691037654876709, + 0.023801153525710106, + 0.1661078780889511, + 0.028950262814760208, + 0.10628699511289597, + -0.08720764517784119, + -0.07669954746961594, + -0.08043921738862991, + -0.006363213993608952, + -0.04886474832892418, + -0.029196573421359062, + -0.02119879052042961, + -0.00895052868872881, + 0.05359954386949539, + -0.002875827020034194, + 0.01099687721580267, + -0.018537666648626328, + 0.0992727279663086, + -0.05339931696653366, + -0.04690808057785034, + -0.04743361845612526, + 0.08799825608730316, + 0.07006954401731491, + -0.03933412581682205, + -0.020580874755978584, + -0.12285801768302917, + -0.014865311793982983, + 0.01774587854743004, + -5.0484382001466955e-33, + -0.03097284585237503, + 0.09441111236810684, + 0.06604082882404327, + 0.037709351629018784, + -0.00632979441434145, + 0.03512584790587425, + -0.032214097678661346, + 0.016415275633335114, + 0.0229373537003994, + -0.043561603873968124, + -0.0333024263381958, + -0.13667026162147522, + -0.03347419574856758, + -0.14437946677207947, + -0.18521352112293243, + 0.06414133310317993, + 0.02752756141126156, + 0.007137614767998457, + 0.049748778343200684, + -0.00906295981258154, + 0.02308010868728161, + 0.08295859396457672, + -0.012124755419790745, + -0.004573007579892874, + -0.02541578747332096, + -0.010783474892377853, + 0.08716955780982971, + -0.049526337534189224, + 0.006761755794286728, + 0.02005709335207939, + -0.0041391439735889435, + 0.004545534960925579, + 0.0686500072479248, + -0.06335049867630005, + 0.0464765727519989, + -0.05477474257349968, + -0.009629011154174805, + -0.08248249441385269, + -0.014563496224582195, + -0.0018979687010869384, + -0.02401474490761757, + -0.043907564133405685, + 0.04889107868075371, + 0.0008528442704118788, + -0.01998620666563511, + 0.06498352438211441, + 7.346321126533439e-06, + 0.09102391451597214, + 0.10025796294212341, + 0.03749712184071541, + -0.011586206033825874, + -0.065218485891819, + -0.04918009415268898, + -0.028919637203216553, + 0.014578567817807198, + -0.019936637952923775, + -0.011860670521855354, + 0.03682943433523178, + 0.04523984715342522, + -0.08813267201185226, + 0.06941299140453339, + -0.05927509814500809, + 0.06256064027547836, + 0.05739178508520126, + -0.023394597694277763, + -0.05472741648554802, + -0.04232921078801155, + -0.0034762267023324966, + 0.028198741376399994, + 0.05463911220431328, + -0.04186934977769852, + 0.12883156538009644, + 0.012514886446297169, + 0.024828240275382996, + -0.025926459580659866, + 0.029718393459916115, + -0.008693739771842957, + -0.08170246332883835, + 0.04893605783581734, + 0.05579771846532822, + -0.0408298596739769, + 0.06212972477078438, + 0.03175906091928482, + 0.026697387918829918, + 0.09816807508468628, + -0.06526175141334534, + 0.0019765023607760668, + -0.09599478542804718, + 0.054348140954971313, + 0.09502345323562622, + -0.02476975880563259, + 0.0012054992839694023, + -0.04546326771378517, + -0.06660448014736176, + -0.019665971398353577, + 2.3062543457690713e-33, + 0.05237451568245888, + -0.059928447008132935, + 0.037788163870573044, + 0.08114948868751526, + 6.412396760424599e-05, + -0.05006156489253044, + 0.00802922435104847, + 0.06410728394985199, + 0.04097677022218704, + 0.040287747979164124, + -0.030725743621587753, + -0.03717454522848129, + 0.10085520148277283, + -0.0997859537601471, + 0.0014208258362486959, + -0.05191975086927414, + 0.12550191581249237, + -0.06505943089723587, + 0.012739825993776321, + 0.08034616708755493, + -0.029755307361483574, + -0.049432460218667984, + 0.02030804567039013, + -0.013933050446212292, + -0.005743865389376879, + -0.008314126171171665, + 0.029602231457829475, + -0.05950257554650307, + -0.0709964856505394, + -0.044689662754535675, + 0.021866358816623688, + -0.03158329054713249, + -0.048548270016908646, + 0.06772401928901672, + -0.0374302975833416, + 0.06331310421228409, + 0.006817288231104612, + -0.009526335634291172, + 0.06733034551143646, + 0.06829550117254257, + -0.02217542752623558, + -0.09820755571126938, + -0.004189562052488327, + 0.06697063148021698, + 0.003556018229573965, + -0.05099106580018997, + -0.055892929434776306, + 0.04070521518588066, + 0.07824967801570892, + 0.047288861125707626, + -0.06245829910039902, + 0.035065799951553345, + -0.07826393097639084, + -0.0011259856401011348, + 0.012758579105138779, + 0.012832358479499817, + -0.0638238862156868, + -0.041369158774614334, + 0.058213792741298676, + 0.012192139402031898, + -0.03349088877439499, + 0.004410069901496172, + -0.04708581790328026, + -0.07522594183683395, + -0.03232260048389435, + 0.00774815259501338, + -0.04920273646712303, + 0.07864551246166229, + 0.04452921450138092, + 0.06888327747583389, + 0.04066088795661926, + -0.041679497808218, + -0.015532654710114002, + 0.006853767670691013, + -0.012966428883373737, + -0.01915619894862175, + 0.10059624910354614, + -0.035002611577510834, + 0.009413094259798527, + -0.06461603194475174, + -0.0061104376800358295, + 0.021075012162327766, + -0.02513316087424755, + -0.06443876028060913, + 0.07268255203962326, + -0.07528213411569595, + -0.010372313670814037, + 0.04901476204395294, + -0.008247245103120804, + -0.047354672104120255, + 0.003835730953142047, + -0.022347547113895416, + 0.00526357302442193, + -0.10191541910171509, + 0.01190889161080122, + -3.435564011056158e-08, + 0.017100509256124496, + -0.011031638830900192, + 0.01067266147583723, + -0.034732624888420105, + -0.04553110525012016, + -0.053697582334280014, + -0.04372187703847885, + -0.061348386108875275, + -0.01088616345077753, + 0.0071554360911250114, + 0.040345799177885056, + 0.021042777225375175, + -0.020796725526452065, + -0.06441261619329453, + 0.03161945939064026, + 0.01229007262736559, + -0.037100180983543396, + -0.045892659574747086, + -0.01625160686671734, + -0.03238838538527489, + -0.033015765249729156, + 0.029694804921746254, + 0.04075215384364128, + 0.01934245228767395, + -0.03307648375630379, + 0.0507473349571228, + 0.02238164283335209, + 0.016980668529868126, + 0.01737947016954422, + -0.0007204401190392673, + 0.0007866905652917922, + 0.028859058395028114, + -0.022456957027316093, + -0.03294429928064346, + -0.017716839909553528, + 0.025146791711449623, + 0.006066915113478899, + -0.02589634247124195, + 0.06328639388084412, + -0.06675468385219574, + 0.04950777068734169, + -0.029641084372997284, + -0.03854209929704666, + 0.055785857141017914, + 0.013056350871920586, + 0.016117503866553307, + -0.03583233058452606, + 0.03434687480330467, + 0.0108940489590168, + -0.015218145214021206, + -0.05202467367053032, + -0.03730126842856407, + 0.07629882544279099, + 0.02770756557583809, + -0.010397711768746376, + 0.06813444942235947, + 0.03388851135969162, + -0.0389065258204937, + 0.03428446874022484, + 0.07747288048267365, + 0.01816493459045887, + 0.02281508967280388, + -0.02696777507662773, + -0.061409395188093185 + ] + }, + { + "type": "NarrativeText", + "element_id": "fc189a009c8fc4507bd42d9601bdb566", + "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.010653991252183914, + 0.11256983876228333, + 0.12362265586853027, + 0.03349260985851288, + 0.10327772796154022, + 0.04370427876710892, + 0.19034716486930847, + -0.04881719499826431, + -0.04538899287581444, + -0.06825293600559235, + -0.06457121670246124, + 0.03152629733085632, + -0.07903748750686646, + -0.06138636916875839, + 0.05686289444565773, + -0.0451602041721344, + -0.05691823735833168, + -0.04495847225189209, + 0.02689264714717865, + 0.04386984184384346, + 0.05656357854604721, + 0.007857800461351871, + 0.05133030563592911, + 0.08910010010004044, + -0.010225356556475163, + 0.04440247267484665, + -0.061703551560640335, + 0.013806710951030254, + -0.024510063230991364, + 0.03309496492147446, + -0.059246573597192764, + 0.022890130057930946, + -0.011628597974777222, + 0.014852121472358704, + 0.008510775864124298, + -0.046981051564216614, + -0.015184677205979824, + -0.03889314830303192, + 0.020497053861618042, + 0.07892901450395584, + -0.007665036246180534, + -0.08813466131687164, + -0.10797024518251419, + -0.020603053271770477, + -0.054023709148168564, + -0.05281106382608414, + 0.1027015745639801, + -0.06056546792387962, + 0.04975075274705887, + 0.06786341965198517, + 0.02613566815853119, + -0.056518856436014175, + -0.041404612362384796, + -0.12265300005674362, + 0.08926376700401306, + -0.03595895692706108, + 0.10509679466485977, + -0.01466009858995676, + 0.04789545014500618, + -0.0393228605389595, + -0.03919137269258499, + -0.021931413561105728, + -0.0456274189054966, + 0.10790623724460602, + 0.027846312150359154, + 0.0283243078738451, + -0.00028398068388924, + 0.03493307903409004, + -0.10772645473480225, + 0.02807561494410038, + 0.02785683050751686, + -0.011980107054114342, + 0.026298217475414276, + -0.02085229381918907, + -0.10088097304105759, + 0.04576760157942772, + 0.049791477620601654, + 0.0024805935099720955, + 0.03908177465200424, + 0.05512986704707146, + -0.018136372789740562, + -0.14219200611114502, + -0.0005620169686153531, + -0.056676097214221954, + 0.05778318643569946, + 0.03216354176402092, + -0.04365905746817589, + -0.0037081625778228045, + -0.04961118847131729, + 0.01768135465681553, + -0.1160094365477562, + 0.023688893765211105, + 0.009782655164599419, + 0.09010545909404755, + 0.0020833611488342285, + 0.045557040721178055, + -0.07638198882341385, + 0.03248213231563568, + 0.008907446637749672, + 0.06373893469572067, + 0.04277603700757027, + 0.0983373150229454, + -0.021791914477944374, + -0.027377618476748466, + -0.009172698482871056, + -0.04652848839759827, + -0.043134454637765884, + -0.0031674739439040422, + -0.033502738922834396, + -0.007962765172123909, + 0.003202742664143443, + -0.05630064010620117, + 0.06203034147620201, + -0.05055928975343704, + -8.3827821072191e-05, + 0.061550404876470566, + 0.06112569198012352, + 0.011851472780108452, + -0.030687876045703888, + 0.00750759756192565, + 0.09114451706409454, + 0.11593954265117645, + 0.05264326184988022, + 0.06592230498790741, + -0.07420974969863892, + -0.039120640605688095, + 0.027092110365629196, + -1.832737486938014e-33, + 0.054607950150966644, + 0.040372781455516815, + 0.039019275456666946, + 0.012410453520715237, + 0.09535899013280869, + -0.007410786114633083, + -0.07205124944448471, + -0.016029100865125656, + 0.008095496334135532, + 0.03479761630296707, + 0.010254926979541779, + -0.02108277752995491, + -0.05064810439944267, + -0.15747486054897308, + -0.018878713250160217, + -0.06597289443016052, + 0.04137654975056648, + 0.009779536165297031, + 0.07300709187984467, + -0.028761887922883034, + 0.03252660110592842, + 0.03329959884285927, + -0.01445112843066454, + -0.02721526473760605, + -0.005444565322250128, + -0.015342739410698414, + 0.01951572857797146, + 0.022202400490641594, + -0.03210348263382912, + 0.022600552067160606, + 0.0027476013638079166, + -0.024636538699269295, + 0.13610735535621643, + 0.02838873118162155, + 0.04335245490074158, + -0.01811506226658821, + -0.024809369817376137, + -0.008998882956802845, + -0.03750864043831825, + -0.046918608248233795, + -0.0054541886784136295, + -0.05334155634045601, + -0.032156623899936676, + -0.0518205501139164, + -0.01692485436797142, + 7.55948422010988e-05, + 0.05796927958726883, + 0.11221027374267578, + 0.025499852374196053, + -0.004597608465701342, + 0.04273572564125061, + 0.007748221047222614, + -0.01409420557320118, + -0.0513765886425972, + 0.0468406081199646, + -0.017855526879429817, + -0.02931954525411129, + -0.06956546753644943, + 0.09505333751440048, + -0.05382121726870537, + 0.07914087176322937, + -0.04052388295531273, + 0.04962170869112015, + -0.01852220483124256, + -0.053996674716472626, + -0.007240693084895611, + 0.007706631440669298, + -0.02627011574804783, + -0.005991199053823948, + 0.03503408282995224, + -0.03321618214249611, + 0.013613591901957989, + -0.056492649018764496, + 0.002238046145066619, + -0.02955956570804119, + 0.006167768966406584, + -0.05619194358587265, + -0.05324400216341019, + 0.035702046006917953, + 0.022269580513238907, + 0.04897570610046387, + -0.0896250531077385, + -0.0417652353644371, + 0.023561041802167892, + 0.02442111074924469, + -0.052866946905851364, + 0.03288323059678078, + -0.04734734818339348, + -0.07115747034549713, + 0.06786515563726425, + -0.01833334192633629, + 0.0020864210091531277, + 0.002738386392593384, + 0.01061275601387024, + -0.04479090869426727, + -2.018936572709982e-34, + 0.019463365897536278, + 0.04408648610115051, + -0.051617324352264404, + 0.03904924914240837, + -0.04494861140847206, + -0.05011332780122757, + -0.08227334916591644, + -0.002039300510659814, + -0.045710429549217224, + -0.04292415454983711, + 0.05907364562153816, + 0.001226853346452117, + 0.05057671293616295, + -0.017013736069202423, + -0.07523750513792038, + -0.009098917245864868, + 0.11158707737922668, + -0.04355854168534279, + 0.053056832402944565, + 0.010513968765735626, + -0.049131546169519424, + 0.0022036773152649403, + -0.021879224106669426, + -0.030561203137040138, + -0.03164594620466232, + 0.021133016794919968, + -0.03621058166027069, + -0.016786785796284676, + -0.027991337701678276, + -0.017979392781853676, + -0.026436887681484222, + -0.055546555668115616, + -0.0606040395796299, + 0.03205908089876175, + 0.054379940032958984, + 0.05133456364274025, + 0.021601837128400803, + -0.07974282652139664, + 0.016569238156080246, + -0.102897047996521, + 0.013447982259094715, + -0.023392414674162865, + -0.04313669353723526, + 0.06333960592746735, + -0.011347893625497818, + -0.001368903904221952, + -0.08862262219190598, + 0.0016136950580403209, + 0.05680185556411743, + 0.08564353734254837, + -0.10816534608602524, + -0.00432585971429944, + -0.10851237922906876, + -0.0036030602641403675, + -0.017638586461544037, + 0.04296039789915085, + -0.058379024267196655, + -0.007408070843666792, + -0.07487191259860992, + 0.02990521490573883, + -0.008162843063473701, + 0.02219122089445591, + 0.02700740285217762, + 0.0072532701306045055, + 0.05926318094134331, + -0.002920248545706272, + -0.005193167366087437, + 0.07360705733299255, + 0.013908849097788334, + 0.09035391360521317, + 0.08343968540430069, + 0.06759883463382721, + -0.13020363450050354, + -0.019805140793323517, + -0.07988034188747406, + -0.012916920706629753, + 0.04695720225572586, + 0.004193378612399101, + -0.021292749792337418, + 0.023584911599755287, + 0.0874580666422844, + 0.01790553703904152, + -0.03438117355108261, + 0.016570962965488434, + 0.0035891402512788773, + 0.025009477511048317, + -0.0332023985683918, + 0.04726262018084526, + -0.03555092588067055, + 0.05733533948659897, + 0.048359498381614685, + -0.0392119437456131, + 0.030850524082779884, + -0.010515616275370121, + -0.013820192776620388, + -2.6571370881356415e-08, + 0.0149922426789999, + 0.008288576267659664, + -0.08745520561933517, + -0.08537159115076065, + 0.03529133275151253, + -0.03531583398580551, + -0.033936597406864166, + -0.02114863321185112, + -0.07676295191049576, + -0.01782185770571232, + 0.0823449119925499, + 0.028616508468985558, + 0.06855896860361099, + 0.02617535926401615, + 0.034271787852048874, + 0.04005250334739685, + 0.009074670262634754, + -0.04730065166950226, + -0.0017418661154806614, + 0.014891090802848339, + 0.032187577337026596, + -0.01052862498909235, + -0.03233152627944946, + -0.0033602644689381123, + 0.014156601391732693, + 0.03192257508635521, + -0.027071943506598473, + -0.0595325268805027, + 0.03527631238102913, + 0.07609442621469498, + 0.05349632725119591, + -0.016890928149223328, + -0.06419415771961212, + -0.028087295591831207, + -0.06703958660364151, + -0.015829728916287422, + 0.06796963512897491, + -0.03194468840956688, + 0.06302083283662796, + -0.035554204136133194, + 0.041748493909835815, + 0.007716791704297066, + 0.03737204149365425, + 0.056130941957235336, + -0.023209910839796066, + -0.029660716652870178, + -0.027147408574819565, + 0.019531412050127983, + -0.06499860435724258, + -0.03730242699384689, + 0.03669005259871483, + 0.01603872887790203, + 0.01606561429798603, + 0.023294812068343163, + 0.02865445241332054, + 0.04150641709566116, + 0.0005327324033714831, + 0.06488089263439178, + 0.015284338966012001, + -0.062041379511356354, + 0.04360412061214447, + -0.019565777853131294, + -0.051476504653692245, + -0.10865490138530731 + ] + }, + { + "type": "NarrativeText", + "element_id": "73b0874758fb74535ea6817963e50dc5", + "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.020206326618790627, + 0.12951605021953583, + 0.08377551287412643, + 0.04278159886598587, + 0.03100825846195221, + 0.06985463947057724, + 0.11802545934915543, + -0.006943210959434509, + 0.011949669569730759, + -0.01587284915149212, + -0.051879871636629105, + 0.05673326179385185, + 0.020080959424376488, + -0.07735719531774521, + 0.019916851073503494, + -0.03506137430667877, + 0.003675970481708646, + 0.06225068122148514, + -0.022381918504834175, + 0.11083511263132095, + 0.0470537431538105, + -0.03191846236586571, + 0.015645673498511314, + 0.015919391065835953, + -0.058905452489852905, + 0.035304926335811615, + 0.025768844410777092, + 0.003458860097452998, + -0.03781076893210411, + 0.043060194700956345, + -0.004105893429368734, + -0.04418948292732239, + 0.024985795840620995, + 0.02370474487543106, + -0.03160213306546211, + -0.022401463240385056, + -0.021533971652388573, + 0.0388607420027256, + -0.001812439993955195, + 0.0209333635866642, + 0.039441805332899094, + 0.03592613339424133, + 0.0341956689953804, + 0.04793383553624153, + 0.017480645328760147, + 0.0002709483669605106, + 0.02019421011209488, + -0.04376136511564255, + -0.03002418577671051, + 0.027169259265065193, + -0.04968971014022827, + 0.04730023071169853, + 0.02438121661543846, + -0.15539486706256866, + 0.00837022252380848, + 0.05612447112798691, + 0.04456628859043121, + 0.036300428211688995, + 0.0731128454208374, + 0.07104058563709259, + -0.040172673761844635, + -0.06114160642027855, + 0.07197651267051697, + 0.018502499908208847, + 0.013932773843407631, + 0.05121603235602379, + 0.023403115570545197, + 0.040281690657138824, + -0.057299137115478516, + 0.050135910511016846, + -0.03053690679371357, + 0.0007221802952699363, + 0.015468952246010303, + -0.11082417517900467, + -0.033662132918834686, + -0.022141609340906143, + 0.01165127195417881, + -0.08020786195993423, + -0.004598679952323437, + 0.01966540329158306, + -0.05902184545993805, + -0.04507965222001076, + -0.10408807545900345, + -0.006518139969557524, + -0.08404999226331711, + -0.0919281542301178, + 0.09309029579162598, + -0.011402170173823833, + -0.005010843276977539, + -0.024076184257864952, + -0.040520813316106796, + -0.02130957506597042, + -0.08896398544311523, + 0.02861582301557064, + -0.002920785453170538, + 0.08083640038967133, + -0.019954150542616844, + -0.03179210051894188, + -0.0848674476146698, + 0.01757947728037834, + 0.05745571106672287, + 0.04248335584998131, + -0.043452370911836624, + 0.042891304939985275, + -0.04520169645547867, + -0.014572777785360813, + -0.09100909531116486, + -0.017879316583275795, + -0.07227156311273575, + 0.007943677715957165, + -0.04940446838736534, + -0.024981405586004257, + 0.03735654056072235, + 0.011509982869029045, + 0.07993854582309723, + 0.022334929555654526, + 0.009303200989961624, + 0.058241765946149826, + -0.01198912225663662, + -0.02335319109261036, + -0.004346925765275955, + 0.07217642664909363, + 0.02550886943936348, + 0.06027132272720337, + -0.012160533107817173, + -0.06261355429887772, + 0.041800398379564285, + -3.1635870078748983e-33, + 0.03156374767422676, + -0.058215316385030746, + 0.056854382157325745, + -0.05399499833583832, + 0.09429427236318588, + 0.055898118764162064, + -0.041270624846220016, + -0.024190830066800117, + 0.0967966765165329, + -0.007501812186092138, + 0.07606165111064911, + -0.06512582302093506, + -0.07639320194721222, + -0.0965874195098877, + -0.0014413073658943176, + 0.08345616608858109, + -0.013049990870058537, + -0.030703747645020485, + 0.11568804830312729, + -0.06129990145564079, + 0.0451325885951519, + 0.0767102986574173, + -0.0033635853324085474, + -0.0679481029510498, + -0.065433569252491, + 0.026259547099471092, + 0.03339254483580589, + -0.012970264069736004, + 0.034523289650678635, + 0.030729850754141808, + 0.008083140477538109, + -0.006239081267267466, + 0.07763146609067917, + -0.02493339776992798, + 0.03211633488535881, + -0.0345403216779232, + -0.04336649179458618, + -1.1976413588854484e-05, + -0.012282912619411945, + -0.04358282312750816, + -0.0022352056112140417, + -0.0023887604475021362, + -0.010764000006020069, + -0.04901925101876259, + -0.10290516912937164, + -0.010432811453938484, + 0.0023269590456038713, + 0.012159927748143673, + 0.009796710684895515, + -0.046414390206336975, + 0.07732982933521271, + -0.019159633666276932, + 0.04067341610789299, + -0.1251504272222519, + -0.01659754104912281, + -0.0374470129609108, + -0.02256210893392563, + 0.027769260108470917, + 0.0941220223903656, + -0.0352298878133297, + 0.052800506353378296, + -0.08531499654054642, + 0.05726967751979828, + 0.023679587990045547, + -0.011398563161492348, + 0.038621947169303894, + -0.029250897467136383, + 0.006520786322653294, + -0.062174711376428604, + 0.0010437890887260437, + -0.033238932490348816, + 0.04564684256911278, + -0.08631335943937302, + -0.040670864284038544, + -0.09217272698879242, + 0.01281055063009262, + -0.09887011349201202, + -0.012768123298883438, + -0.0301449503749609, + 0.02322523668408394, + 0.026605388149619102, + -0.046630099415779114, + -0.048457223922014236, + -0.03969937562942505, + 0.06972545385360718, + -0.043720874935388565, + 0.011204211972653866, + -0.08883003145456314, + -0.010241560637950897, + 0.08738932013511658, + -0.0003399127454031259, + 0.03516257554292679, + 0.015185882337391376, + 0.036003123968839645, + -0.07327540218830109, + 1.0332179788107792e-34, + -0.031170424073934555, + -0.048264045268297195, + 0.034741416573524475, + 0.07790860533714294, + 0.037564028054475784, + -0.015025565400719643, + -0.11607640236616135, + -0.03865440562367439, + -0.013813146390020847, + -0.006157978437840939, + -0.0013500230852514505, + -0.05648372322320938, + 0.011533418670296669, + -0.026408104225993156, + -0.04795572906732559, + -0.04658983647823334, + 0.09586966782808304, + 0.030003126710653305, + -0.06222178786993027, + 0.035325922071933746, + 0.006078779231756926, + -0.0647435113787651, + -0.012039557099342346, + -0.04550516977906227, + 0.06804170459508896, + 0.0059554120525717735, + 0.0031284508295357227, + -0.012635464780032635, + -0.09527383744716644, + -0.002857451094314456, + 0.027784787118434906, + 0.08043225109577179, + -0.04234839603304863, + 0.0663965493440628, + 0.03488648310303688, + -0.006224817596375942, + -0.030352048575878143, + 0.014665749855339527, + -0.02213641069829464, + -0.015464365482330322, + -0.019275495782494545, + -0.07928955554962158, + -0.08202323317527771, + -0.039992451667785645, + 0.008896821178495884, + 0.025985656306147575, + -0.01395951397716999, + 0.06156378611922264, + 0.11064369976520538, + 0.0422767736017704, + -0.043259505182504654, + 0.005848775152117014, + -0.015286476351320744, + -0.008673782460391521, + 0.0036576357670128345, + -0.022587943822145462, + -0.05052908882498741, + 0.008483442477881908, + 0.10826188325881958, + 0.023219691589474678, + -0.02730082906782627, + -0.0413639098405838, + -0.02510647289454937, + -0.07338708639144897, + 0.020329782739281654, + 0.08812791109085083, + 0.036406975239515305, + 0.06843488663434982, + 0.08328747004270554, + -0.03653814643621445, + 0.128536194562912, + 0.02491999976336956, + -0.02418178878724575, + 0.05908017233014107, + -0.021658403798937798, + 0.07286794483661652, + -0.04955882579088211, + -0.05740759149193764, + -0.044852759689092636, + 0.003638384398072958, + 0.028418442234396935, + -0.027394317090511322, + -0.010077469982206821, + -0.028411833569407463, + -0.01770344190299511, + -0.052606165409088135, + -0.02642357349395752, + 0.05938555672764778, + -0.05396811291575432, + 0.0425487719476223, + 0.06250689923763275, + -0.021676789969205856, + 0.01560309249907732, + -0.0015137396985664964, + 0.015357364900410175, + -3.8228396448403146e-08, + -0.049064140766859055, + -0.052037931978702545, + 0.006525189615786076, + -0.014914864674210548, + 0.060469288378953934, + 0.03626136854290962, + 0.05240617319941521, + 0.015142379328608513, + 0.0015692722517997026, + -0.013902568258345127, + 0.10088538378477097, + -0.053030140697956085, + 0.00703282980248332, + 0.04286135733127594, + 0.03630499541759491, + 0.027795173227787018, + -0.014182857237756252, + -0.13576540350914001, + 0.014229431748390198, + 0.0261383019387722, + 0.056223057210445404, + -0.017076699063181877, + 0.05786007642745972, + -0.0871492326259613, + -0.07669668644666672, + 0.006748637650161982, + -0.03315608203411102, + -0.024255715310573578, + -0.0055033122189342976, + 0.07801549881696701, + 0.06804454326629639, + -0.09997420012950897, + -0.059346649795770645, + -0.017244858667254448, + -0.022032350301742554, + 0.02899738773703575, + 0.019033927470445633, + -0.021869730204343796, + 0.1061856597661972, + -0.08663272112607956, + 0.05268502235412598, + 0.05580368638038635, + -0.01550976186990738, + 0.060663677752017975, + -0.00181534921284765, + 0.018070874735713005, + -0.035142216831445694, + 0.032141320407390594, + -0.10321121662855148, + 0.013819309882819653, + 0.020531337708234787, + 0.031466737389564514, + 0.04016421362757683, + 0.010569972917437553, + 0.03386285901069641, + 0.018914733082056046, + 0.021683424711227417, + 0.03738854452967644, + -0.17384527623653412, + -0.06058354675769806, + 0.0823649987578392, + 0.015764012932777405, + -0.07598789036273956, + 0.003258800134062767 + ] + }, + { + "type": "NarrativeText", + "element_id": "3b8e76f2baafa3482edb98626c6fd7aa", + "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.03342371806502342, + 0.08513977378606796, + 0.05262714996933937, + 0.05147744342684746, + 0.008624279871582985, + 0.019226713106036186, + 0.11567427217960358, + 0.07488913834095001, + -0.06722360849380493, + -0.04794970527291298, + -0.01104511134326458, + -0.03488222882151604, + -0.05327500030398369, + -0.024843327701091766, + 0.014417257159948349, + -0.06916350871324539, + 0.04852113872766495, + -0.08373995870351791, + 0.04642673581838608, + 0.025284666568040848, + 0.09441138803958893, + -0.0502125546336174, + 0.12197329849004745, + 0.0491221584379673, + 0.03031592257320881, + 0.05504320189356804, + -0.02652476541697979, + 0.03156887739896774, + 0.016133608296513557, + -0.04025481268763542, + -0.06404861807823181, + -0.027360549196600914, + -0.015811851248145103, + -0.0029734415002167225, + -0.02746819332242012, + 0.05845686420798302, + -0.022867092862725258, + -0.034504085779190063, + 0.07089599967002869, + 0.04768503084778786, + -0.015782730653882027, + -0.02368752844631672, + -0.08858262002468109, + 0.10066484659910202, + -0.0631270632147789, + -0.046084821224212646, + 0.025854073464870453, + 0.009028603322803974, + -0.015042530372738838, + 0.0015956687275320292, + -0.05784343183040619, + -0.09996753185987473, + -0.01118563674390316, + -0.037245769053697586, + 0.08959486335515976, + 0.049213606864213943, + 0.02411811798810959, + -0.050853095948696136, + 0.009689879603683949, + 0.00107310782186687, + -0.03301776200532913, + -0.03163949027657509, + -0.021600911393761635, + 0.11823241412639618, + -0.010597150772809982, + -0.020074594765901566, + -0.016708802431821823, + 0.06683234125375748, + -0.08344876766204834, + 0.04243997111916542, + -0.0020189739298075438, + -0.03526170924305916, + 0.014384079724550247, + -0.00114669487811625, + -0.016982203349471092, + -0.015549593605101109, + 0.0546073354780674, + 0.028359193354845047, + 0.00841131154447794, + -0.03904729709029198, + 0.0830266997218132, + -0.019280120730400085, + -0.06942664086818695, + 0.004916767124086618, + -0.0011368099367246032, + -0.006244029384106398, + 0.09663219004869461, + -0.10448399186134338, + -0.02323107235133648, + 0.023865215480327606, + -0.05511888861656189, + -0.045337751507759094, + -0.019095124676823616, + 0.01215389370918274, + 0.022195078432559967, + 0.10891315340995789, + -0.08467863500118256, + -0.04017164930701256, + -0.01944127306342125, + 0.008800755254924297, + -0.006557496264576912, + -0.021660959348082542, + -0.0035863593220710754, + 0.03811806067824364, + -0.07967109978199005, + 0.0044114915654063225, + -0.04446791484951973, + -0.1141345202922821, + -0.003104613395407796, + -0.014155152253806591, + -0.033592887222766876, + -0.10191158205270767, + 0.04423771798610687, + -0.016349690034985542, + 0.03867435082793236, + 0.007891174405813217, + -0.0013088756240904331, + 0.0057261111214756966, + -0.08642800152301788, + 0.007591336499899626, + 0.057926226407289505, + -0.06650466471910477, + 0.03430335596203804, + 0.05817409232258797, + -0.07935775071382523, + 0.039846811443567276, + 0.03623029962182045, + -3.5282797221136975e-33, + 0.09347350895404816, + 0.034909263253211975, + -0.047751087695360184, + 0.051433317363262177, + 0.012835186906158924, + -0.014820022508502007, + 0.03752565011382103, + -0.022871077060699463, + -0.052018072456121445, + 0.10732632875442505, + -0.06055246293544769, + -0.045430801808834076, + -0.023168154060840607, + 0.022060366347432137, + -0.042797911912202835, + 0.06273540109395981, + 0.020183417946100235, + 0.020963389426469803, + 0.03245827183127403, + 0.01664109341800213, + 0.08178995549678802, + 0.04709193482995033, + -0.08770083636045456, + -0.060722656548023224, + 0.01126183196902275, + 0.030453650280833244, + 0.0052910842932760715, + -0.008912338875234127, + 0.002271133242174983, + 0.00173093075864017, + -0.0911165103316307, + 0.07851098477840424, + 0.009972491301596165, + -0.005782570224255323, + 0.03086969070136547, + 0.020964287221431732, + -0.07526244223117828, + -0.08612780272960663, + -0.019093092530965805, + -0.008296084590256214, + 0.046879928559064865, + 0.009091437794268131, + -0.06302816420793533, + 0.005970295052975416, + -0.05079472437500954, + 0.011675451882183552, + -0.015259324572980404, + 0.07252682000398636, + 0.04954374581575394, + -0.056695688515901566, + 0.011587142013013363, + 0.051811911165714264, + 0.000887457630597055, + -0.010242966935038567, + 0.08723442256450653, + 0.02765689417719841, + -0.04703427478671074, + -0.015987509861588478, + 0.11399178206920624, + 0.02900473214685917, + -0.01416283193975687, + -0.03893335163593292, + 0.027395004406571388, + -0.019131775945425034, + -0.03436694294214249, + -0.011862356215715408, + -0.0486287996172905, + 0.024184102192521095, + 0.030016092583537102, + 0.037364136427640915, + -0.06791726499795914, + 0.023034995421767235, + -0.10504087805747986, + -0.025439800694584846, + -0.034347549080848694, + -0.024942874908447266, + 0.004088287241756916, + -0.0030367528088390827, + -0.014883828349411488, + -0.09496495872735977, + 0.03237719088792801, + -0.06538836658000946, + 0.028552774339914322, + 0.10747817903757095, + -0.002974304836243391, + -0.05891755595803261, + 0.04938992112874985, + -0.05641869828104973, + -0.038815271109342575, + 0.07755856961011887, + -0.05135307461023331, + -0.018850743770599365, + -0.0470573790371418, + 0.06388959288597107, + -0.05305341258645058, + 3.263669147916885e-34, + 0.05793431028723717, + 0.03429606556892395, + -0.09402097761631012, + 0.013472425751388073, + -0.07261443883180618, + -0.011468240059912205, + -0.0676976665854454, + -0.006202943157404661, + -0.07966677844524384, + -0.05549566075205803, + -0.01921446993947029, + 0.004230210091918707, + 0.04485609009861946, + -0.04262787103652954, + -0.07189716398715973, + -0.0037762161809951067, + 0.05764886736869812, + -0.0708005279302597, + 0.05060948431491852, + 0.02089093253016472, + 0.02605670504271984, + -0.024572983384132385, + -0.08855784684419632, + 0.023050379008054733, + -0.007019469980150461, + 0.02311650477349758, + 0.06808694452047348, + -3.303098856122233e-05, + -0.16635791957378387, + 0.03618656098842621, + 0.023629216477274895, + -0.07929810136556625, + -0.041956137865781784, + 0.04091061279177666, + 0.06580491364002228, + 0.09183645993471146, + 0.07680416107177734, + -0.06866256892681122, + -0.027831323444843292, + -0.056984759867191315, + 0.036186583340168, + -0.015769045799970627, + 0.013524188660085201, + 0.050546418875455856, + 0.017025062814354897, + 0.0165950208902359, + 0.00011455830099293962, + 0.05044014751911163, + 0.0016380269080400467, + -0.014796826988458633, + 0.036198243498802185, + -0.04374665021896362, + -0.05902782082557678, + -0.016839101910591125, + -0.06848651170730591, + 0.02829565666615963, + 0.03688717260956764, + -0.02206091210246086, + 0.0797087550163269, + -0.020057978108525276, + -0.0042294892482459545, + 0.037745896726846695, + 0.007963243871927261, + 0.008670471608638763, + 0.018695693463087082, + -0.05416543409228325, + -0.00802434328943491, + -0.04578344151377678, + 0.08465138077735901, + -0.007806604262441397, + 0.013992395251989365, + -0.023009581491351128, + -0.020857159048318863, + -0.047529466450214386, + -0.0401642732322216, + 0.051874078810214996, + 0.0012261841911822557, + -0.14704668521881104, + -0.00478216540068388, + 0.032204627990722656, + 0.02896144799888134, + -0.017328917980194092, + 0.00520895142108202, + 0.039198826998472214, + 0.038682401180267334, + 0.16185303032398224, + -0.0033375828061252832, + 0.07442822307348251, + 0.013632474467158318, + 0.010453831404447556, + 0.03932604938745499, + -0.04941476136445999, + 0.026289017871022224, + -0.034113794565200806, + 0.027384210377931595, + -2.633965046072717e-08, + 0.026281647384166718, + 0.049991294741630554, + -0.014689185656607151, + -0.009161656722426414, + 0.04411310702562332, + 0.023217037320137024, + -0.12026069313287735, + 0.0061365594156086445, + -0.04010029882192612, + -0.0341937355697155, + 0.05503888800740242, + 0.03386237844824791, + 0.07241027802228928, + 0.029679682105779648, + 0.09800054132938385, + 0.07632961869239807, + 0.03416607528924942, + 0.06656735390424728, + -0.07427140325307846, + -0.024487460032105446, + -0.06688807159662247, + 0.027105404064059258, + 0.002868437208235264, + -0.07122628390789032, + -0.0083407461643219, + 0.01354574877768755, + 0.08080471307039261, + 0.001469059963710606, + 0.05829135701060295, + 0.00012745715503115207, + 0.03562508895993233, + -0.01796681620180607, + -0.02497522532939911, + -0.008376874029636383, + 0.020359264686703682, + 0.09904240071773529, + 0.08526667952537537, + -0.011286646127700806, + 0.13558287918567657, + -0.10274514555931091, + -0.02530268207192421, + 0.051285918802022934, + 0.03121301159262657, + 0.03983765095472336, + -0.12721706926822662, + 0.022358793765306473, + -0.07814673334360123, + -0.06490527093410492, + -0.01744559034705162, + -0.0017374102026224136, + 0.02637677825987339, + -0.024805519729852676, + -0.05245833471417427, + 0.06862308084964752, + 0.011838816106319427, + -0.0129818394780159, + -0.06295140832662582, + -0.026889150962233543, + -0.020131289958953857, + -0.059935152530670166, + 0.0318981409072876, + -0.006793226581066847, + -0.003658923553302884, + -0.019297689199447632 + ] + }, + { + "type": "NarrativeText", + "element_id": "fbc14cba30b1dc3c20bd0bcbb36d7de5", + "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/book-war-and-peace-1p.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + }, + "embeddings": [ + -0.03062039241194725, + 0.11658511310815811, + 0.06840557605028152, + 0.03385869413614273, + -0.011450362391769886, + 0.06567823141813278, + 0.10147763043642044, + -0.02784118615090847, + -0.10256844013929367, + -0.09672968834638596, + -0.11508270353078842, + -0.03473297134041786, + -0.0075867921113967896, + -0.09611301869153976, + 0.018453530967235565, + 0.03379557654261589, + -0.0036427362356334925, + -0.052353810518980026, + -0.047805409878492355, + 0.0854685977101326, + 0.059437792748212814, + 0.058452799916267395, + -0.010093247517943382, + 0.091165691614151, + 0.011204486712813377, + 0.04561157897114754, + 0.045512374490499496, + 0.026107633486390114, + -0.044079866260290146, + 0.02090480364859104, + -0.1036195158958435, + -0.006478417664766312, + -0.004234401974827051, + 0.03857743367552757, + 0.01971903070807457, + -0.01106950081884861, + 0.017892343923449516, + 0.0118138762190938, + -0.011747496202588081, + 0.014486816711723804, + 0.011824647895991802, + -0.024226056411862373, + -0.01933845691382885, + 0.08651664853096008, + 0.006315367761999369, + -0.04796777665615082, + -0.03175467997789383, + -0.03359665721654892, + 0.026989929378032684, + 0.028787745162844658, + -0.0037169456481933594, + 0.0373343899846077, + -0.016810541972517967, + -0.11789627373218536, + 0.005820679012686014, + 0.02434415929019451, + 0.00841984711587429, + -0.0031512014102190733, + 0.09586744755506516, + -0.04183382913470268, + 0.0056878929026424885, + -0.00699774781242013, + 0.01651979796588421, + 0.08096307516098022, + 0.03017777018249035, + -0.014325308613479137, + 0.013215739279985428, + 0.027095019817352295, + -0.1017422005534172, + 0.02287573181092739, + 0.09313109517097473, + 0.007237189915031195, + 0.02488630823791027, + -0.03306378796696663, + -0.08371336758136749, + -0.0793229416012764, + 0.04488971084356308, + -0.03888683766126633, + 0.019242843613028526, + 0.0097972946241498, + -0.017630834132432938, + -0.13485994935035706, + -0.10429353266954422, + 0.020150844007730484, + -0.034101977944374084, + -0.02568933553993702, + 0.07359836250543594, + -0.03787151724100113, + -0.04844767972826958, + 0.03811434656381607, + 0.009081250056624413, + -0.04421187937259674, + -0.0234221201390028, + 0.04396011307835579, + -0.04531798139214516, + 0.1374182105064392, + -0.005524445325136185, + 0.01614019274711609, + 0.025496289134025574, + -0.007060116156935692, + 0.005493327509611845, + 0.03996917977929115, + 0.005201163236051798, + -0.008771810680627823, + -0.07763214409351349, + -0.019097669050097466, + -0.027698978781700134, + -0.031971175223588943, + -0.03836334869265556, + -0.11884204298257828, + -0.04141640663146973, + -0.09703481197357178, + 0.04636285826563835, + -0.012623785994946957, + 0.0074942526407539845, + 0.0012106818612664938, + 0.026815561577677727, + -0.024071475490927696, + -0.10793618112802505, + -0.030899008736014366, + 0.037830423563718796, + 0.05862521007657051, + -0.07164493203163147, + 0.06962022185325623, + -0.03713823854923248, + -0.0009952762629836798, + 0.013093167915940285, + -8.433771564386664e-34, + 0.07010415196418762, + 0.05530248209834099, + 0.05645710229873657, + 0.07233108580112457, + 0.0410662516951561, + 0.020495254546403885, + -0.07546685636043549, + 0.009956919588148594, + 0.004133446142077446, + -0.0011218525469303131, + 0.015150350518524647, + -0.023769505321979523, + -0.04326249286532402, + -0.09433013200759888, + -0.02236325852572918, + -0.042428892105817795, + -0.06810398399829865, + -0.034027501940727234, + 0.01406354270875454, + -0.08960604667663574, + 0.03640615567564964, + 0.1341436356306076, + -0.03256399556994438, + -0.010535534471273422, + -0.06925470381975174, + 0.0014241498429328203, + 0.05509605631232262, + -0.043437689542770386, + -0.009649509564042091, + 0.05104508996009827, + -0.056296057999134064, + 0.02744130976498127, + 0.01539466343820095, + -0.07102219760417938, + -0.010381119325757027, + -0.00637851795181632, + -0.08509370684623718, + 0.004385745618492365, + -0.09540271013975143, + -0.022966820746660233, + -0.023332182317972183, + -0.0031879469752311707, + -0.044991519302129745, + 0.08684410154819489, + -0.001085872296243906, + -0.07022510468959808, + -0.07152993232011795, + 0.06745175272226334, + 0.02616734430193901, + 0.0028156822081655264, + 0.017324162647128105, + -0.013740797527134418, + 0.03159443289041519, + 0.015321961604058743, + 0.09027786552906036, + 0.0023796958848834038, + -0.0026804290246218443, + -0.004132778383791447, + 0.0726153701543808, + -0.02088511921465397, + 0.1340552419424057, + -0.013763670809566975, + 0.018531644716858864, + -0.005064093042165041, + 0.017734451219439507, + 0.034849487245082855, + -0.051738325506448746, + 0.021430157124996185, + 0.03340969607234001, + -0.09922833740711212, + -0.021377170458436012, + 0.04216053709387779, + -0.027194524183869362, + 0.023806676268577576, + -0.06812924891710281, + 0.016043057665228844, + -0.01036363746970892, + -0.002622612053528428, + -0.07356513291597366, + -0.029667677357792854, + -0.06297769397497177, + -0.07508223503828049, + 0.031849127262830734, + 0.0563737154006958, + 0.05295384302735329, + -0.040851205587387085, + 0.013757179491221905, + -0.07462408393621445, + 0.023779187351465225, + 0.09099095314741135, + -0.07036614418029785, + 0.043479226529598236, + 0.030034013092517853, + -0.006183300632983446, + -0.06637344509363174, + -1.1228162245291175e-33, + 0.009592457674443722, + -0.013606715947389603, + 0.028936179354786873, + 0.10430671274662018, + -0.06853877753019333, + -0.0721086710691452, + -0.07098358869552612, + 0.02395092323422432, + -0.021944684907794, + -0.048914264887571335, + -0.02097492478787899, + -0.043649233877658844, + 0.1029880940914154, + -0.050399694591760635, + -0.010445715859532356, + -0.015686966478824615, + 0.09286479651927948, + -0.006173861678689718, + -0.009836435317993164, + 0.017719438299536705, + -0.028635594993829727, + -0.05707540735602379, + -0.04548688232898712, + -0.026321787387132645, + 0.01986899971961975, + 0.05991179496049881, + 0.02989157661795616, + -0.11091004312038422, + -0.056568242609500885, + -0.028803039342164993, + 0.043210677802562714, + -0.01891515776515007, + -0.029463425278663635, + 0.02891312539577484, + 0.020163564011454582, + 0.04694231599569321, + 0.08859184384346008, + -0.04014614596962929, + 0.030233383178710938, + -0.012781520374119282, + 0.03141099959611893, + -0.10608061403036118, + -0.10153605788946152, + 0.03928650915622711, + -0.004890939220786095, + -0.016299152746796608, + 0.03805616497993469, + 0.027883319184184074, + 0.0879654809832573, + 0.060353897511959076, + 0.022316718474030495, + -0.0018401318229734898, + -0.019650056958198547, + 0.01133913453668356, + -0.011818090453743935, + -0.05419110879302025, + -0.03173195570707321, + -0.03331926837563515, + 0.08423365652561188, + 0.01332488376647234, + 0.0013668957399204373, + 0.033095236867666245, + 0.01751074008643627, + 0.0014572051586583257, + 0.023784466087818146, + 0.10100594907999039, + 0.020519444718956947, + 0.053452908992767334, + 0.1083199754357338, + -0.03295895457267761, + 0.004178298637270927, + -0.05156240612268448, + -0.08082640171051025, + 0.05194802209734917, + 0.0404670387506485, + 0.04691654443740845, + -0.024723296985030174, + -0.12952785193920135, + -0.0692090317606926, + 0.034258175641298294, + 0.038761869072914124, + -0.027142910286784172, + -0.05694182589650154, + 0.0025772631634026766, + 0.09436209499835968, + -0.08792443573474884, + 0.0055595519952476025, + -0.006851444486528635, + -0.0009211327997036278, + 0.0220797061920166, + 0.011961840093135834, + -0.06731319427490234, + 0.007805218454450369, + -0.05029301717877388, + 0.018926553428173065, + -3.56512011023824e-08, + 0.008370205760002136, + -0.01577330194413662, + -0.024207470938563347, + 0.007023866754025221, + 0.055150050669908524, + -0.010496317408978939, + -0.021936075761914253, + -0.020491894334554672, + -0.012689801864326, + -0.022893982008099556, + 0.06425684690475464, + 0.0027127990033477545, + 0.004561522975564003, + -0.005903051234781742, + 0.04811227694153786, + 0.06165698170661926, + 0.06413770467042923, + -0.1156829223036766, + -0.03321155905723572, + -0.07866071164608002, + -0.010193978436291218, + -0.0075994981452822685, + -0.0036520850844681263, + 0.009702233597636223, + -0.04661893472075462, + 0.034715648740530014, + -0.0009251561132259667, + 0.04593962803483009, + 0.03481017425656319, + 0.0205642431974411, + 0.043307654559612274, + -0.010698848403990269, + -0.07941724359989166, + 0.025380361825227737, + 0.030375370755791664, + 0.008141186088323593, + 0.01681917905807495, + 0.04162430763244629, + 0.09563540667295456, + -0.12796121835708618, + -0.03156564384698868, + 0.10483843088150024, + -0.04920373111963272, + 0.06143995374441147, + 0.03261723741889, + -0.027665095403790474, + -0.0672636553645134, + 0.05920660123229027, + -0.0777386948466301, + -0.013844474218785763, + -0.0027378113009035587, + -0.05468808114528656, + 0.10512180626392365, + 0.036075714975595474, + 0.0031894007697701454, + 0.06391051411628723, + 0.03540597856044769, + 0.04482096806168556, + -0.04115288704633713, + 0.004169686231762171, + 0.04572827368974686, + 0.020074985921382904, + -0.01063019223511219, + -0.00033094469108618796 + ] + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json b/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json deleted file mode 100644 index 93c6f271b3..0000000000 --- a/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json +++ /dev/null @@ -1,5267 +0,0 @@ -[ - { - "type": "Title", - "element_id": "ecc6ecfdda0975d91546edf1cd407e86", - "text": "CHAPTER I", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.03905358910560608, - 0.06777486950159073, - 0.010958918370306492, - 0.0012426828034222126, - -0.013691974803805351, - 0.055743373930454254, - 0.11454292386770248, - 0.02517000585794449, - -0.007307224906980991, - -0.04347857087850571, - 0.015188234858214855, - -0.0008902765694074333, - 0.08180297166109085, - -0.04100024327635765, - -0.014650953933596611, - -0.04903419315814972, - -0.010637544095516205, - -0.048637472093105316, - -0.055734604597091675, - -0.021019726991653442, - -0.0201592855155468, - 0.08178041130304337, - 0.02293364331126213, - 0.02327793277800083, - -0.06536834686994553, - 0.004294208716601133, - 0.0013802761677652597, - -0.016765661537647247, - -0.03942941874265671, - -0.16493840515613556, - -0.001806008629500866, - 0.04371352866292, - -0.030743751674890518, - -0.03578189015388489, - 0.007972977124154568, - -0.01052060816437006, - 0.08600007742643356, - 0.0008449788438156247, - 0.06916183233261108, - 0.011546945199370384, - -0.04101349785923958, - -0.024453941732645035, - -0.008781857788562775, - -0.0012528117513284087, - 0.032106149941682816, - -0.05179743468761444, - 0.020187513902783394, - 0.010731163434684277, - 0.03549065813422203, - -0.006568705663084984, - -0.012302322313189507, - 0.015625350177288055, - -0.07307184487581253, - 0.05225842818617821, - 0.005438330117613077, - 0.10789937525987625, - 0.029409080743789673, - -0.01955241896212101, - 0.060391370207071304, - 0.024580294266343117, - 0.0066605983301997185, - 0.014496334828436375, - -0.059024371206760406, - 0.018884165212512016, - 0.05368296802043915, - -0.04883323982357979, - -0.03478896617889404, - -0.03594771400094032, - -0.029253903776407242, - -0.005442366003990173, - -0.05627906322479248, - 0.0055951629765331745, - -0.0653470829129219, - -0.0659431666135788, - -0.023416602984070778, - 0.008515076711773872, - 0.05723069608211517, - -0.02647349238395691, - 0.08162933588027954, - -0.08124540001153946, - -0.08059797435998917, - -0.026992209255695343, - 0.008641927503049374, - -0.05588875338435173, - -0.009593095630407333, - 0.08471686393022537, - 0.012107343412935734, - -0.02897532284259796, - -0.0074949306435883045, - 0.03564765304327011, - -0.04704892262816429, - -0.07886863499879837, - 0.09840919077396393, - 0.006313791498541832, - -0.05892647057771683, - 0.041811030358076096, - 0.005335912108421326, - -0.016099615022540092, - 0.050923049449920654, - 0.2253960818052292, - -0.00611790269613266, - -0.029893936589360237, - 0.006088017486035824, - -0.039472199976444244, - -0.08597660809755325, - -0.027387086302042007, - -0.031985603272914886, - -0.04598861187696457, - -0.014253798872232437, - -0.04349174350500107, - -0.06363194435834885, - -0.034045107662677765, - -0.0076544932089746, - 0.0038880177307873964, - 0.06429541110992432, - 0.016424594447016716, - 0.13503167033195496, - 0.020152702927589417, - 0.054559506475925446, - 0.012581389397382736, - 0.03185432776808739, - 0.026501024141907692, - 0.018824053928256035, - 0.10952351242303848, - -0.08162879943847656, - -0.03960675001144409, - -0.0032470286823809147, - -4.269091323023273e-33, - 0.06185511127114296, - -0.08925726264715195, - 0.013858241960406303, - 0.014075211249291897, - 0.04526887461543083, - 0.0024966951459646225, - 0.019853880628943443, - 0.040941022336483, - -0.03524932265281677, - 4.289705248083919e-05, - -0.06601123511791229, - -0.003698268672451377, - -0.06800546497106552, - -0.0440298356115818, - 0.057263102382421494, - -0.0044755651615560055, - 0.018038611859083176, - 0.005330389831215143, - -0.09185048192739487, - -0.012726654298603535, - 0.037934836000204086, - 0.03344470635056496, - 0.03331831842660904, - -0.060712166130542755, - -0.013047892600297928, - 0.020851636305451393, - -0.004250632598996162, - -0.058109987527132034, - -0.015378139913082123, - 0.07314843684434891, - 0.03095244988799095, - 0.013710983097553253, - -0.05411076545715332, - -0.12046772986650467, - -0.02648378536105156, - -0.013706093654036522, - -0.009664993733167648, - -0.02770247496664524, - 0.06314343959093094, - 0.019671471789479256, - -0.08084971457719803, - -0.0036577696446329355, - 0.017928779125213623, - 0.030036229640245438, - 0.04298318177461624, - 0.08673416078090668, - 0.13455122709274292, - 0.027358200401067734, - -0.03258578106760979, - -0.010898889042437077, - -0.007018843665719032, - -0.028333114460110664, - -0.02667233720421791, - 0.022378353402018547, - -0.005776816513389349, - 0.02410120703279972, - -0.04932825639843941, - -0.007980617694556713, - 0.021779408678412437, - 0.05339276045560837, - 0.048314474523067474, - 0.10402950644493103, - 0.00886040460318327, - 0.037524979561567307, - -0.07240667194128036, - -0.00011428128345869482, - 0.02017560414969921, - -0.06184634938836098, - 0.05961460620164871, - -0.05503390356898308, - -0.16565214097499847, - -0.02770734205842018, - 0.09743264317512512, - -0.00033640116453170776, - 0.05654434487223625, - -0.051994431763887405, - -0.0370136983692646, - 0.05928681045770645, - -0.1309632658958435, - 0.01619136705994606, - -0.05736489221453667, - -0.038294464349746704, - -0.0764356330037117, - 0.03953959420323372, - 0.05474060773849487, - -0.003197955898940563, - 0.0578320287168026, - -0.05534062162041664, - -0.006174840033054352, - -0.06396003067493439, - 0.03695429489016533, - -0.0048160552978515625, - 0.08334645628929138, - 0.040309589356184006, - 0.004848717246204615, - 1.5468019758493467e-33, - -0.010510802268981934, - -0.11877680569887161, - -0.02399861067533493, - -0.045038871467113495, - 0.019606271758675575, - -0.018350496888160706, - -0.026684321463108063, - -0.010227969847619534, - -0.06658003479242325, - 0.014446602202951908, - -0.10882732272148132, - 0.01711495779454708, - 0.01623862236738205, - 0.06407270580530167, - 0.08058272302150726, - 0.019555656239390373, - 0.04779680818319321, - -0.013917267322540283, - -0.04839387536048889, - -0.0061320895329117775, - -0.023716766387224197, - -0.010434459894895554, - -0.007593490183353424, - -0.04394460842013359, - 0.024425795301795006, - 0.0004080524086020887, - 0.02652953378856182, - 0.03431902080774307, - -0.005246715620160103, - 0.03659747168421745, - 0.016706889495253563, - -0.03680451959371567, - -0.04313862696290016, - -0.005520129110664129, - -0.029771938920021057, - 0.009256012737751007, - 0.03093816339969635, - -0.038504909723997116, - 0.0071301707066595554, - 0.002536887302994728, - 0.08790997415781021, - -0.03878892958164215, - 0.0192886870354414, - 0.0923716202378273, - -0.017960842698812485, - -0.015640700235962868, - 0.07834357023239136, - 0.03496072068810463, - -0.05659980699419975, - 0.04549147188663483, - -0.06022792309522629, - -0.07567069679498672, - 0.043599072843790054, - -0.08339101076126099, - 0.013377152383327484, - 0.022528696805238724, - -0.02521531470119953, - -0.024786753579974174, - 0.0212554968893528, - 0.03799212723970413, - -0.01698782667517662, - 0.062367599457502365, - -0.07540280371904373, - 0.08831459283828735, - 0.06627689301967621, - -0.06869129836559296, - -0.03236827626824379, - -0.0842338427901268, - 0.03145937994122505, - 0.032238349318504333, - -0.06467290222644806, - -0.04131615534424782, - 0.008703738451004028, - 0.07175716012716293, - 0.01768484339118004, - -0.028763415291905403, - -0.11189690232276917, - -0.006346534471958876, - -0.1058584526181221, - -0.0904374048113823, - -0.08279415965080261, - -0.04315422847867012, - -0.032436393201351166, - 0.034681275486946106, - 0.03060273453593254, - -0.013560059480369091, - 0.09569396078586578, - 0.001019880292005837, - 0.045048344880342484, - 0.009828394278883934, - 0.020691972225904465, - 0.05328556150197983, - 0.056311678141355515, - 0.0005694369319826365, - 0.027328871190547943, - -1.4383411794938183e-08, - 0.01674884371459484, - 0.06547889113426208, - 0.07251793891191483, - 0.034017693251371384, - 0.016718940809369087, - 0.02763294242322445, - -0.018502671271562576, - 0.04206664860248566, - -0.024970639497041702, - 0.05260658636689186, - 0.0556945838034153, - 0.04880279675126076, - 0.1056448370218277, - 0.056813839823007584, - 0.01710408180952072, - 0.07104698568582535, - -0.0012886695330962539, - -0.008612032048404217, - -0.02639884315431118, - -0.027114171534776688, - 0.052976831793785095, - 0.013118532486259937, - 0.019299853593111038, - -0.10220217704772949, - -0.027542224153876305, - 0.027875347062945366, - -0.03622899577021599, - 0.10509008914232254, - -0.019843464717268944, - 0.030714355409145355, - 0.02683083899319172, - 0.07362940162420273, - -0.125151589512825, - -0.03394262120127678, - -0.008359977975487709, - 0.030580008402466774, - 0.016863683238625526, - 0.017872517928481102, - -0.0002850913442671299, - -0.09362588822841644, - 0.02057875506579876, - -0.006355494260787964, - 0.07356219738721848, - 0.019790589809417725, - 0.002965210471302271, - 0.024464452639222145, - 0.009671531617641449, - 0.01640116423368454, - 0.04246217757463455, - -0.06015724316239357, - -0.07367906719446182, - 0.03363675996661186, - 0.02566550299525261, - 0.07107985764741898, - 0.01328246109187603, - 0.05232810601592064, - -0.004840012174099684, - -0.025187980383634567, - -0.07005804032087326, - -0.011108207516372204, - 0.1163470447063446, - -0.035843148827552795, - -0.06202436238527298, - -0.08774128556251526 - ] - }, - { - "type": "NarrativeText", - "element_id": "9ab2c6fc3fff6cedc83ffce2ffcc8705", - "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.05874713510274887, - 0.10656480491161346, - 0.030508864670991898, - -0.02482161857187748, - -0.02444542944431305, - 0.06009218096733093, - 0.05141943693161011, - -0.08806687593460083, - -0.06982985138893127, - -0.10064216703176498, - -0.0504942424595356, - -0.08565101772546768, - -0.001889770501293242, - -0.0606808066368103, - -0.07801131159067154, - 0.022434258833527565, - -0.021279962733387947, - 0.013246458023786545, - 0.007391566410660744, - 0.15346579253673553, - 0.010216261260211468, - -0.037326764315366745, - 0.029132187366485596, - 0.1313096582889557, - 0.033914919942617416, - 0.07146814465522766, - 0.06141147390007973, - 0.06220635026693344, - -0.038903702050447464, - 0.02126525714993477, - 0.024463621899485588, - -0.008229895494878292, - 0.001965218223631382, - -0.02225165255367756, - 0.04346250370144844, - -0.024317467585206032, - 0.06304802000522614, - -0.0032200238201767206, - 0.040856990963220596, - -0.06138285994529724, - 0.07564108073711395, - 0.03557411581277847, - 0.05523442476987839, - 0.12190321832895279, - 0.0015077595598995686, - -0.005630569998174906, - -0.01286593358963728, - 0.047433607280254364, - 0.0345277264714241, - -0.07475457340478897, - -0.0702810287475586, - -0.03263528272509575, - 0.009802822023630142, - -0.03040664829313755, - -0.07674533873796463, - 0.12278788536787033, - 0.006970113608986139, - -0.04915519431233406, - 0.03230420872569084, - -0.008575426414608955, - -0.008731342852115631, - 0.06175641715526581, - 0.021360445767641068, - 0.06800870597362518, - -0.05496225133538246, - 0.03136273846030235, - -0.0031888047233223915, - 0.09498443454504013, - -0.15730290114879608, - 0.08546385169029236, - 0.05568833649158478, - -0.01873161271214485, - 0.031843557953834534, - -0.03811875730752945, - -0.03217019885778427, - -0.03334486484527588, - 0.05631453916430473, - -0.06807487457990646, - 0.012875628657639027, - -0.05491160973906517, - -0.009411957114934921, - -0.06047649681568146, - -0.005097203887999058, - 0.03667798638343811, - -0.01735915057361126, - -0.043124932795763016, - 0.13297702372074127, - -0.05144325643777847, - 0.07867272198200226, - 0.004182095639407635, - 0.011900887824594975, - 0.012927809730172157, - 0.01813146099448204, - 0.0515093058347702, - -0.03438986837863922, - 0.03820415958762169, - 0.005536300130188465, - 0.10068914294242859, - -0.0652921050786972, - 0.04977913945913315, - 0.004730586893856525, - -0.05184308439493179, - 0.0514308363199234, - 0.013343083672225475, - -0.0672304704785347, - -0.002193532884120941, - -0.037461236119270325, - 0.0004999084048904479, - -0.004775757901370525, - -0.08317892998456955, - 0.00939152855426073, - -0.0628887414932251, - -0.019432302564382553, - -0.020009029656648636, - -0.02738085947930813, - 0.04080546274781227, - 0.09818453341722488, - 0.004701980389654636, - -0.060363538563251495, - -0.0030813536141067743, - 0.04463755339384079, - 0.0366981066763401, - -0.054774828255176544, - 0.0970960333943367, - 0.0022707213647663593, - -0.019855156540870667, - -0.04361123591661453, - -2.5647978997477024e-33, - -0.005203988403081894, - -0.034174855798482895, - -0.00035177735844627023, - 0.03426545113325119, - -0.0352540984749794, - 0.034352824091911316, - -0.07474204152822495, - -0.01232660561800003, - -0.057859133929014206, - -0.048323020339012146, - -0.024462519213557243, - -0.007020973600447178, - -0.03689667955040932, - 0.04385479539632797, - -0.03327155113220215, - 0.042445868253707886, - -0.058089304715394974, - -0.02207815647125244, - 0.027183188125491142, - -0.07541300356388092, - 0.032816532999277115, - 0.0015269487630575895, - -0.06739698350429535, - -0.0010412667179480195, - 0.01954491063952446, - 0.03010645881295204, - 0.0841066986322403, - 0.02962297946214676, - 0.04990168288350105, - 0.05803744122385979, - 0.027960434556007385, - 0.035487402230501175, - 0.04623205214738846, - -0.04980507865548134, - 0.016254926100373268, - 0.0800669938325882, - -0.07473579794168472, - -0.027176083996891975, - -0.05857780575752258, - -0.01857825741171837, - -0.04880596324801445, - 0.03256797045469284, - -0.08259384334087372, - 0.023284856230020523, - 0.0002488868485670537, - -0.12520954012870789, - 0.017054608091711998, - -0.015016733668744564, - 0.026285890489816666, - -0.003427292685955763, - 0.011149115860462189, - -0.07744865864515305, - -0.06510679423809052, - 0.05469556897878647, - 0.026935163885354996, - -0.02860243432223797, - -0.10690241307020187, - 0.07598370313644409, - 0.047793902456760406, - -0.06476540863513947, - 0.08826083689928055, - -0.05912715196609497, - 0.02452665939927101, - 0.09490557760000229, - -0.013721363618969917, - -0.029740121215581894, - 0.000406311999540776, - 0.0674629658460617, - -0.0030368876177817583, - -0.07345457375049591, - -0.027896622195839882, - -0.06864511221647263, - 0.009670533239841461, - 0.01082942821085453, - -0.059951506555080414, - -0.0016298461705446243, - -0.019092809408903122, - 0.0070182811468839645, - -0.032586731016635895, - -0.03575167432427406, - -0.039596930146217346, - -0.0037336305249482393, - 0.03977549821138382, - 0.07186190038919449, - 0.055892568081617355, - -0.018120326101779938, - 0.07676933705806732, - -0.018220890313386917, - -0.014926326461136341, - 0.047694385051727295, - -0.03230489790439606, - 0.0400506928563118, - 0.05335370823740959, - -0.02887483313679695, - -0.12175753712654114, - -5.007494813552594e-34, - 0.005681969691067934, - -0.05102137103676796, - 0.04206482693552971, - -0.014440404251217842, - -0.119805708527565, - -0.04997745156288147, - -0.02830345183610916, - 0.031310953199863434, - 0.04197292402386665, - 0.009160518646240234, - -0.06150886416435242, - -0.010343320667743683, - 0.060277536511421204, - -0.06173500418663025, - 0.015110856853425503, - -0.004956216551363468, - 0.1211816817522049, - 0.02008718065917492, - -0.07931523025035858, - -0.019512414932250977, - -0.05346916243433952, - 0.006304190028458834, - -0.05663381889462471, - 0.026090426370501518, - 0.007936684414744377, - -0.012739484198391438, - 0.08506488054990768, - 0.026506898924708366, - -0.032519347965717316, - -0.09956459701061249, - -0.02356145717203617, - -0.013485168106853962, - -0.055348414927721024, - 0.0048079416155815125, - 0.010034901089966297, - 0.08030771464109421, - 0.0405142605304718, - -0.0031254023779183626, - 0.049714136868715286, - -0.031183136627078056, - -0.052488140761852264, - -0.05640411376953125, - -0.10136307030916214, - -0.011093135923147202, - -0.06251667439937592, - 0.03959471732378006, - 0.06921856105327606, - 0.008909936994314194, - 0.04276518151164055, - 0.008871694095432758, - -0.0068938639014959335, - -0.03439166024327278, - -0.011620834469795227, - -0.027362685650587082, - -0.043699055910110474, - -0.12517930567264557, - -0.03718714416027069, - -0.04610684514045715, - -0.015714477747678757, - -0.014186370186507702, - 0.01614244468510151, - 0.030985984951257706, - -0.02919776923954487, - 0.01756308600306511, - 0.02026434615254402, - -0.0013386347563937306, - -0.08551288396120071, - 0.034661442041397095, - 0.0629744604229927, - 0.014522685669362545, - -0.018858879804611206, - 0.02214157022535801, - -0.1596716195344925, - 0.03829749673604965, - 0.0016653232742100954, - 0.04812471196055412, - -0.056686002761125565, - -0.10623833537101746, - 0.0020919914823025465, - -0.016548067331314087, - 0.07570972293615341, - -0.08482731133699417, - 0.013707313686609268, - 0.05509069934487343, - 0.0012867397163063288, - -0.07505050301551819, - 0.04088008031249046, - 0.026234952732920647, - 0.011179867200553417, - 0.03986078128218651, - -0.042105890810489655, - -0.050777338445186615, - 0.030621202662587166, - -0.06969142705202103, - 0.03848569467663765, - -5.1532378364527176e-08, - 0.10533050447702408, - -0.015051892958581448, - 0.04974827170372009, - 0.0031942161731421947, - -0.050247181206941605, - -0.0004891457501798868, - -0.05504215136170387, - 0.00823890045285225, - 0.04313641041517258, - 0.046289097517728806, - 0.04256119579076767, - -0.0398327000439167, - 0.057825736701488495, - -0.033869046717882156, - -0.022735154256224632, - 0.08741383254528046, - 0.005194360390305519, - -0.09908326715230942, - -0.02987176738679409, - -0.05700455978512764, - -0.04945582523941994, - 0.046763911843299866, - 0.027027789503335953, - -0.025993982329964638, - 0.02325207181274891, - -0.010954228229820728, - -0.00912116002291441, - -0.04941118508577347, - -0.03576076030731201, - -0.002564770868048072, - 0.03850233554840088, - -0.03570830076932907, - -0.07853587716817856, - 0.029881104826927185, - -0.03232889622449875, - 0.0753493383526802, - 0.02964486926794052, - -0.005432994570583105, - 0.0780261754989624, - -0.039111290127038956, - 0.02815544232726097, - 0.05981971323490143, - 0.005577470175921917, - 0.03696584329009056, - 0.02285165525972843, - -0.04101956635713577, - 0.01182562205940485, - -0.03328271210193634, - -0.017237508669495583, - 0.009173452854156494, - 0.008293956518173218, - -0.0002600503503344953, - 0.03159290924668312, - 0.03948486968874931, - -0.012713950127363205, - 0.01546971220523119, - -0.0009075977723114192, - 0.10846075415611267, - -0.037171583622694016, - 0.08666630834341049, - 0.049259256571531296, - -0.013701966032385826, - -0.04806886240839958, - -0.04492267593741417 - ] - }, - { - "type": "NarrativeText", - "element_id": "19fd6c4711db1634658ca1b582bbb282", - "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.05423666909337044, - 0.05444525182247162, - 0.020339185371994972, - 0.08231724053621292, - -0.07275067269802094, - 0.09235042333602905, - 0.06840194761753082, - 0.01329968310892582, - -0.015014543198049068, - -0.05566732957959175, - -0.05068725347518921, - 0.024246947839856148, - -0.014591097831726074, - -0.04864667356014252, - -0.09186425805091858, - 0.013159312307834625, - -0.0007341026212088764, - 0.023640617728233337, - 0.05775585398077965, - 0.07107023894786835, - -0.038492199033498764, - 0.02185862511396408, - 0.019213762134313583, - 0.04542824253439903, - 0.027777135372161865, - 0.011529195122420788, - -0.05850802734494209, - -0.02710614912211895, - 0.033411722630262375, - 0.03780396282672882, - -0.0488094799220562, - -0.029516002163290977, - 0.024384185671806335, - 0.08260192722082138, - -0.01029425673186779, - 0.02442782185971737, - 0.004690755624324083, - 0.042408525943756104, - 0.007938768714666367, - 0.035158876329660416, - -0.014480900950729847, - -0.08043230324983597, - -0.01727035455405712, - -0.039487097412347794, - 0.013404523953795433, - -0.025633588433265686, - -0.08467279374599457, - 0.09215320646762848, - 0.055031176656484604, - 0.00045371733722276986, - 0.004816466011106968, - 0.017839843407273293, - -0.032101403921842575, - -0.04114362597465515, - 0.05560088902711868, - -0.02144431695342064, - 0.021150901913642883, - 0.002239654306322336, - 0.009305098094046116, - 0.03832445666193962, - -0.027697481215000153, - 0.01734330505132675, - 0.08889874070882797, - 0.051704660058021545, - -0.032718248665332794, - -0.10792195796966553, - 0.06509968638420105, - 0.012057896703481674, - 0.04884810745716095, - 0.07896538823843002, - -0.024069231003522873, - 0.07106569409370422, - 0.007170222233980894, - -0.04288618266582489, - -0.04381052777171135, - -0.019540958106517792, - -0.0006824375013820827, - -0.02388349547982216, - -0.013316062279045582, - 0.012203848920762539, - -0.023478655144572258, - -0.031126663088798523, - -0.03533965349197388, - 0.06731268763542175, - -0.0659903883934021, - -0.05461880564689636, - -0.019474904984235764, - -0.044913217425346375, - 0.026329202577471733, - -0.04147928208112717, - 0.000254144542850554, - -0.049209799617528915, - -0.057092513889074326, - 0.035833392292261124, - -0.016211627051234245, - -0.040548186749219894, - -0.10610909759998322, - 0.008952648378908634, - -0.009497568011283875, - 0.027620891109108925, - 0.007005218882113695, - 0.0415591262280941, - -0.008658169768750668, - 0.027144962921738625, - -0.0368620790541172, - 0.023691609501838684, - 0.001803730265237391, - -0.14327065646648407, - -0.08328871428966522, - 0.018394270911812782, - -0.005625121295452118, - -0.06089230999350548, - 0.0629616305232048, - -0.10043159872293472, - -0.017577869817614555, - 0.11918675899505615, - -0.045187320560216904, - -0.06142745912075043, - 0.013173154555261135, - -0.05008642375469208, - -0.026333438232541084, - -0.08440456539392471, - -0.05948567017912865, - 0.04684463143348694, - -0.02080521173775196, - 0.010918394662439823, - 0.06306123733520508, - -1.8330846021546173e-34, - 0.023663127794861794, - -0.021635068580508232, - 0.09398180991411209, - 0.06558015197515488, - 0.028110843151807785, - -0.028553524985909462, - 0.025134043768048286, - -0.05164061486721039, - -0.021254753693938255, - -0.023560406640172005, - 0.04106174036860466, - -0.03740977123379707, - -0.03663378208875656, - -0.13780514895915985, - -0.003202211344614625, - 0.028226152062416077, - -0.13028375804424286, - 0.07633688300848007, - 0.017309347167611122, - 0.07765893638134003, - 0.07021741569042206, - 0.015129990875720978, - -0.032607533037662506, - 0.07045387476682663, - -0.028855236247181892, - 0.038545116782188416, - 0.04149923846125603, - -0.020848482847213745, - 0.05232224240899086, - -0.00822402536869049, - 0.005626605357974768, - 0.013916544616222382, - 0.03284778073430061, - -0.004382647108286619, - -0.015786737203598022, - -0.051043782383203506, - 0.04193837568163872, - -0.06510252505540848, - -0.012801084667444229, - -0.0063155172392725945, - 0.056240204721689224, - -0.060843124985694885, - 0.09915226697921753, - -0.01780637539923191, - -0.06444516032934189, - -0.01345480140298605, - -0.07744936645030975, - -0.019391309469938278, - 0.08984296023845673, - -0.09032352268695831, - -0.04268106073141098, - -0.05032942816615105, - -0.0047477311454713345, - 0.14171960949897766, - -0.019611267372965813, - 0.03920888528227806, - 0.058255840092897415, - 0.04776237905025482, - 0.09023991227149963, - -0.04467959702014923, - -0.05505827069282532, - -0.00784969236701727, - 0.03222418203949928, - -0.012056526727974415, - 0.09152240306138992, - -0.07704490423202515, - -0.039842016994953156, - 0.05128531530499458, - -0.04115530103445053, - 0.007803372573107481, - 0.0424455925822258, - 0.06815174221992493, - -0.0684749037027359, - -0.028895776718854904, - 0.05384017899632454, - 0.03684699535369873, - -0.008438479155302048, - 0.01692717708647251, - -0.03625312075018883, - 0.0157542172819376, - -0.0026881948579102755, - -0.021143700927495956, - -0.003649543970823288, - 0.10435757040977478, - -0.07923296093940735, - -0.06165704131126404, - 0.013717948459088802, - 0.004878684878349304, - 0.007263530977070332, - 0.06831801682710648, - -0.0740785300731659, - 0.0624053068459034, - 0.05403151735663414, - -0.03127589076757431, - -0.1286262422800064, - -2.122634046646968e-33, - 0.0013190907193347812, - 0.029269885271787643, - 0.02589591033756733, - 0.1299196183681488, - 0.024839190766215324, - 0.019481273368000984, - -0.061224259436130524, - 0.056131333112716675, - -0.07899627834558487, - -0.05418187007308006, - 0.042069029062986374, - -0.027046620845794678, - 0.03789370134472847, - 0.03565771132707596, - 0.011533146724104881, - 0.06362222880125046, - 0.032901011407375336, - 0.06732574850320816, - -0.02241230569779873, - 0.028121424838900566, - -0.010809613391757011, - 0.007977132685482502, - -0.023789724335074425, - -0.015545609407126904, - 0.012052388861775398, - -0.00030492295627482235, - 0.13803179562091827, - -0.04963701218366623, - -0.1383620649576187, - 0.03576039895415306, - 0.028368273749947548, - 0.018479131162166595, - -0.11063777655363083, - 0.05067219212651253, - 0.014886987395584583, - -0.003695678198710084, - 0.04480915144085884, - 0.02217528596520424, - 0.019430870190262794, - 0.004540923982858658, - -0.002111382083967328, - -0.005595901980996132, - 0.01563967764377594, - 0.04120780527591705, - 0.08500000834465027, - -0.04632546380162239, - -0.04556257277727127, - -0.013389864936470985, - 0.08864811807870865, - 0.024622580036520958, - -0.058714359998703, - 0.013165381737053394, - -0.008856123313307762, - 0.026293983682990074, - 0.004199490416795015, - 0.013887199573218822, - -0.012397672049701214, - -0.0915493369102478, - 0.07278919965028763, - 0.0064527601934969425, - -0.02172330766916275, - -0.019711917266249657, - -0.10501730442047119, - -0.12632639706134796, - -0.011150195263326168, - -0.023831967264413834, - -0.01591426692903042, - 0.062429752200841904, - 0.004688825458288193, - -0.05650950223207474, - 0.004797067027539015, - 0.002453196793794632, - 0.03858102113008499, - 0.12522628903388977, - 0.04028629511594772, - 0.001992786768823862, - 0.016523076221346855, - -0.03520101681351662, - 0.013053600676357746, - -0.01452006958425045, - -0.02713269367814064, - -0.009017275646328926, - -0.0139585230499506, - -0.07057645916938782, - 0.03341757878661156, - 0.06892048567533493, - -0.007473575882613659, - -0.05211004242300987, - -0.019589431583881378, - -0.007827995344996452, - -0.02149311825633049, - 0.003227331442758441, - 0.04853278398513794, - -0.09270540624856949, - -0.07592538744211197, - -4.300748202012983e-08, - -0.0045827943831682205, - 0.03556409478187561, - -0.00024055397079791874, - -0.07003432512283325, - 0.02512563392519951, - -0.11407536268234253, - -0.011003656312823296, - -0.07727199792861938, - -0.036746688187122345, - -0.023768587037920952, - -0.07787065207958221, - -0.02629818022251129, - 0.07306750118732452, - -0.02506394125521183, - 0.10978508740663528, - -0.027902867645025253, - 0.020963935181498528, - 0.004359292797744274, - -0.06021265313029289, - -0.07803105562925339, - 0.06699907034635544, - -0.024545859545469284, - 0.02576916106045246, - -0.08864094316959381, - -0.05483921617269516, - 0.06868870556354523, - 0.018014874309301376, - -0.0478951595723629, - 0.046625781804323196, - -0.023343771696090698, - -0.03775409236550331, - 0.08953213691711426, - -0.08868102729320526, - -0.023651650175452232, - 0.018190674483776093, - 0.11000266671180725, - -0.013920482248067856, - -0.08008894324302673, - 0.05862164497375488, - -0.017713695764541626, - 0.0061033922247588634, - 0.018481381237506866, - 0.0069978851824998856, - -0.025797255337238312, - 0.011212282814085484, - 0.08030984550714493, - 0.0012547350488603115, - -0.06218872591853142, - 0.01369908731430769, - 0.04872714355587959, - -0.0191862341016531, - 0.06085670739412308, - 0.026860378682613373, - 0.07354812324047089, - -0.030469680204987526, - -0.0058739036321640015, - 0.047359317541122437, - -0.007555088493973017, - 0.03952139988541603, - -0.029159000143408775, - -0.030523788183927536, - -0.0025960360653698444, - -0.04840218275785446, - 0.0614558607339859 - ] - }, - { - "type": "NarrativeText", - "element_id": "9a29e46f349e568cbec4c13d6ac6d3e1", - "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - 0.0023595455568283796, - 0.04725555330514908, - 0.07864277064800262, - 0.02816121280193329, - 0.052525974810123444, - 0.048932209610939026, - -0.038659512996673584, - -0.04315819963812828, - -0.03542991355061531, - 0.04965610429644585, - -0.012053768150508404, - 0.016365492716431618, - 0.013249953277409077, - -0.07936280965805054, - -0.08082810789346695, - 0.01530385110527277, - -0.004346815403550863, - -0.05749056488275528, - 0.020369477570056915, - 0.070593923330307, - -0.028021935373544693, - 0.02481032721698284, - 0.13321219384670258, - 0.06980545073747635, - -0.018942737951874733, - -0.060627490282058716, - -0.013086947612464428, - 0.007532241754233837, - 0.004013243597000837, - -0.009438945911824703, - -0.042332105338573456, - 0.0913439616560936, - 0.015961814671754837, - 0.03689055144786835, - 0.08075912296772003, - -0.0018513035029172897, - 0.09394270181655884, - -0.002426315099000931, - -0.04186232388019562, - 0.046128831803798676, - -0.023436421528458595, - -0.09251978993415833, - -0.030521539971232414, - 0.044763803482055664, - -0.04440255090594292, - -0.03522655367851257, - -0.02618449367582798, - 0.02913331426680088, - -0.08911265432834625, - 0.006474820431321859, - 0.02180786244571209, - 0.052435554563999176, - -0.08674846589565277, - -0.05736842006444931, - 0.012563902884721756, - 0.001919214497320354, - 0.01962612196803093, - -0.020220084115862846, - 0.04051610082387924, - -0.0016887761885300279, - -0.004476823378354311, - 0.04316245764493942, - -0.0706145316362381, - 0.014945807866752148, - -0.024342592805624008, - -0.07555877417325974, - -0.024880604818463326, - 0.05288991332054138, - -0.014725962653756142, - 0.0817783772945404, - 0.04086130112409592, - 0.02597678080201149, - -0.000799003173597157, - 0.04838088899850845, - -0.016430716961622238, - 0.030447600409388542, - 0.021219568327069283, - -0.028680941089987755, - 0.005886939819902182, - 0.0349339060485363, - -0.12274086475372314, - -0.10635420680046082, - 0.03752436116337776, - 0.06280919164419174, - 0.048371899873018265, - -0.005929640494287014, - -0.011176642030477524, - -0.00685890344902873, - 0.07332990318536758, - 0.010608868673443794, - -0.14890246093273163, - -0.05217859521508217, - 0.031723301857709885, - 0.008842121809720993, - -0.03602171689271927, - -0.0007704166928306222, - 0.00022353281383402646, - 0.014227169565856457, - -0.024083180353045464, - 0.03981215879321098, - 0.019911523908376694, - 0.094389908015728, - 0.00202686688862741, - 0.04820345714688301, - -0.013482271693646908, - -0.07065846771001816, - 0.014181003905832767, - -0.058766625821590424, - 0.003275791648775339, - -0.0436919704079628, - 0.03385256603360176, - -0.04383928328752518, - 0.06243089586496353, - -0.029873071238398552, - -0.013840588741004467, - 0.0700821503996849, - -0.035606566816568375, - -0.06703662127256393, - 0.029444878920912743, - 0.05646829307079315, - -0.03390173986554146, - 0.07127062231302261, - 0.01649673469364643, - -0.026936156675219536, - -0.07139121741056442, - -0.07321657985448837, - 0.07671329379081726, - -4.4568104240098996e-33, - 0.023896176367998123, - 0.09456682950258255, - 7.626810111105442e-05, - 0.002071917522698641, - 0.10064510256052017, - 0.07494156062602997, - -0.02089782990515232, - 0.015334729105234146, - 0.030347280204296112, - -0.00583969010040164, - -0.022854648530483246, - -0.03741898387670517, - 0.03622421249747276, - 0.045642267912626266, - -0.10225758701562881, - 0.05357274413108826, - 0.09479650110006332, - 0.06655430048704147, - 0.008523096330463886, - 0.023905707523226738, - 0.1307642161846161, - 0.0048844777047634125, - 0.009571907110512257, - 0.004908599890768528, - 0.008563571609556675, - 0.03072134219110012, - 0.03431536257266998, - 0.06559020280838013, - 0.038170311599969864, - 0.03405507653951645, - -0.01002222765237093, - -0.057840440422296524, - 0.08795300871133804, - -0.0179357398301363, - 0.04056176915764809, - -0.010452114045619965, - -0.0043557570315897465, - -0.07645674049854279, - 0.01745658740401268, - 0.029208818450570107, - -0.03892460837960243, - -0.037331465631723404, - 0.03961649164557457, - -0.033404458314180374, - -0.14091329276561737, - 0.05262598767876625, - -0.022296374663710594, - -0.04128558933734894, - 0.10736940056085587, - 0.021923154592514038, - -0.07073382288217545, - 0.025094589218497276, - 0.03249844163656235, - 0.04170314222574234, - 0.02152794599533081, - -0.007863281294703484, - 0.004558955784887075, - -0.04750956594944, - 0.016495583578944206, - -0.08661268651485443, - 0.08075809478759766, - 0.04040287807583809, - -0.05833827331662178, - 0.013957317918539047, - 0.03553757816553116, - -0.06154719740152359, - -0.02872048318386078, - -0.106046661734581, - 0.022067101672291756, - -0.11240128427743912, - -0.049365490674972534, - 0.009042900986969471, - 0.03821280598640442, - 0.01027524471282959, - -0.026160309091210365, - 0.05564219877123833, - 0.061347443610429764, - -0.03146400675177574, - 0.04470422491431236, - -0.09301458299160004, - -0.04586620628833771, - -0.029080376029014587, - -0.05353635549545288, - 0.021124161779880524, - 0.0137900086119771, - 0.016992295160889626, - 0.04274428263306618, - -0.04939054697751999, - -0.014555824920535088, - 0.03152505308389664, - 0.02878335304558277, - -0.014487143605947495, - -0.026664717122912407, - -0.09144086390733719, - -0.054154396057128906, - 1.5841569822665712e-33, - 0.020983485504984856, - 0.05347200110554695, - -0.0666104182600975, - 0.0009038331336341798, - 0.03180099278688431, - 0.023325085639953613, - -0.036106470972299576, - 0.02951556071639061, - 0.1761523336172104, - -0.06718979775905609, - -0.02546771429479122, - -0.08531615883111954, - 0.040496375411748886, - -0.018054232001304626, - -0.04524073749780655, - -0.042808279395103455, - 0.10139471292495728, - 0.029113274067640305, - 0.06066914647817612, - 0.005392052233219147, - -0.06589948385953903, - 0.00784360896795988, - 0.018318844959139824, - -0.06844127178192139, - 0.06228255480527878, - 0.006715207826346159, - 0.15284967422485352, - -0.020420784130692482, - -0.15761911869049072, - 0.007363112177699804, - 0.03968852013349533, - -0.017503680661320686, - -0.0853145644068718, - 0.01812361180782318, - -0.01632780209183693, - 0.009068229235708714, - -0.055808912962675095, - 0.09528883546590805, - 0.04409671202301979, - 0.03809197247028351, - 0.034919656813144684, - -0.07533687353134155, - 0.0045872218906879425, - 0.0657544806599617, - 0.05714783817529678, - -0.02794371172785759, - -0.12656232714653015, - -0.03589311242103577, - 0.035945288836956024, - 0.039329614490270615, - -0.08188362419605255, - -0.0044206236489117146, - 0.011425508186221123, - 0.05608394369482994, - -0.008442601189017296, - -0.04040392488241196, - -0.02630300633609295, - -0.05984063446521759, - 0.042021896690130234, - -0.00045357091585174203, - 0.01906394585967064, - 0.03440665453672409, - -0.05627536401152611, - -0.05575418472290039, - 0.003195107914507389, - -0.036631450057029724, - -0.053801536560058594, - -0.02139955386519432, - -0.00240793963894248, - -0.020292164757847786, - -0.0038107612635940313, - -0.03221882879734039, - -0.061760179698467255, - 0.12360666692256927, - 0.00804359931498766, - 0.03380918130278587, - -0.011053086258471012, - -0.05986084043979645, - 0.03416253998875618, - -0.047253333032131195, - -0.04168696701526642, - -0.04694301635026932, - -0.0260507483035326, - -0.02609088458120823, - 0.04234696179628372, - -0.041649818420410156, - 0.006260092370212078, - 0.005315867718309164, - 0.03272666782140732, - 0.04735765978693962, - 0.08116106688976288, - 0.024832293391227722, - 0.056733787059783936, - -0.008381984196603298, - -0.012605300173163414, - -2.6700321953398998e-08, - 0.0173638928681612, - 0.020590703934431076, - -0.08382881432771683, - -0.04974590241909027, - 0.06646018475294113, - -0.05172014981508255, - 0.03654056787490845, - -0.09228931367397308, - 0.03999173641204834, - -0.02021026983857155, - -0.04262700304389, - 0.05382784828543663, - 0.004780335817486048, - -0.0806218832731247, - 0.01284059789031744, - 0.003851325483992696, - 0.04451688751578331, - -0.028158845379948616, - -0.0896618664264679, - -0.013239150866866112, - 0.04167158529162407, - 0.03307757154107094, - 0.017056236043572426, - -0.1276015192270279, - -0.047713592648506165, - 0.020504266023635864, - -0.03143443539738655, - -0.05803839489817619, - -0.05736900120973587, - 0.05310548096895218, - -0.022472675889730453, - 0.05122159421443939, - 0.05061117932200432, - -0.05920892581343651, - -0.07698393613100052, - 0.11378925293684006, - 0.011539570055902004, - 0.022280462086200714, - 0.01308724656701088, - 0.004714048467576504, - 0.07125482708215714, - -0.045311592519283295, - -0.02944216877222061, - -0.02851048670709133, - 0.022433875128626823, - -0.022116653621196747, - 8.010292367544025e-05, - -0.08019819110631943, - 0.011157521978020668, - 0.02311084233224392, - -0.03485306352376938, - 0.019541818648576736, - 0.04963162913918495, - 0.008129259571433067, - -0.02148033119738102, - -0.0024885712191462517, - 0.014285121113061905, - -0.01857011392712593, - 0.08718403428792953, - 0.017262062057852745, - -0.034702301025390625, - -0.014993852935731411, - 0.002555765910074115, - -0.04470689967274666 - ] - }, - { - "type": "NarrativeText", - "element_id": "e37f7a0354c187111dbbd76c353a8548", - "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.0486200675368309, - 0.03687994182109833, - 0.08913001418113708, - 0.02740292251110077, - -0.010071254335343838, - 0.10595865547657013, - 0.06342905759811401, - -0.006530117709189653, - -0.013889468275010586, - -0.08802184462547302, - -0.07767901569604874, - 0.03545073792338371, - -0.028766755014657974, - -0.07743862271308899, - -0.04468626156449318, - -0.003775473218411207, - 0.039629947394132614, - 0.009885578416287899, - 0.01979304850101471, - 0.021323954686522484, - -0.0026755575090646744, - -0.024544110521674156, - 0.005611400585621595, - 0.05783711373806, - -0.028035499155521393, - -0.03807012364268303, - -0.00041047052945941687, - -0.043670617043972015, - -0.05767305567860603, - 0.06916483491659164, - -0.019221559166908264, - 0.037834297865629196, - -0.05280989781022072, - 0.0457116961479187, - 0.028409214690327644, - -0.008392547257244587, - -0.03478812798857689, - 0.009684872813522816, - 0.04222657158970833, - 0.06265994906425476, - 0.046711426228284836, - -0.07935521751642227, - -0.05677919089794159, - 0.006064497400075197, - -0.0704653188586235, - -0.05251940339803696, - 0.03405332565307617, - -0.040065523236989975, - 0.02645055577158928, - -0.014553762972354889, - -0.08697935193777084, - 0.03064056485891342, - -0.08134804666042328, - -0.11330573260784149, - 0.03267943486571312, - 0.07518132030963898, - 0.021188270300626755, - -0.01012127660214901, - 0.07134698331356049, - 0.03555463254451752, - 0.015815041959285736, - 0.012798475101590157, - 0.07203026860952377, - 0.05252337083220482, - -0.013321950100362301, - 0.019010037183761597, - -0.044231314212083817, - 0.0501399040222168, - -0.13513486087322235, - 0.08778794854879379, - -0.06448495388031006, - 0.002710360335186124, - -0.006125655025243759, - -0.009832935407757759, - -0.08841817826032639, - -0.03373432159423828, - -0.029915373772382736, - -0.07407315820455551, - 0.012009776197373867, - 0.015418953262269497, - -0.08306627720594406, - -0.1068945899605751, - -0.06608618050813675, - -0.03254545480012894, - 0.017948443070054054, - -0.03686046972870827, - 0.07100756466388702, - -0.06899384409189224, - 0.022355545312166214, - 0.01030892413109541, - -0.025783095508813858, - 0.0421191081404686, - -0.0319056399166584, - 0.04253570735454559, - -0.02041158452630043, - 0.03507412225008011, - 0.022603202611207962, - -0.02040170133113861, - -0.07323509454727173, - 0.11387623101472855, - 0.008910994976758957, - 0.08846544474363327, - 0.017315221950411797, - 0.038065697997808456, - -0.029453326016664505, - -0.015634004026651382, - 0.009224776178598404, - -0.016234414651989937, - -0.016330869868397713, - -0.04198377579450607, - 0.008446373045444489, - 0.007598000578582287, - 0.10469182580709457, - -0.03243805468082428, - 0.026653122156858444, - 0.004351389594376087, - 0.06812882423400879, - -0.03050711378455162, - 0.06144854053854942, - 0.010282271541655064, - 0.07976134121417999, - 0.0043565197847783566, - 0.03692830353975296, - 0.06752533465623856, - -0.05194715037941933, - -0.020909508690238, - 0.06060613691806793, - -3.4575398496136546e-34, - 0.06664282083511353, - -0.013305791653692722, - 0.1437346190214157, - 0.055512797087430954, - 0.029381390661001205, - -0.022709239274263382, - -0.03923968970775604, - 0.008861650712788105, - 0.04451305791735649, - -0.012673668563365936, - 0.006724156904965639, - -0.10200201719999313, - 0.015488673001527786, - -0.10154028236865997, - 0.005830927286297083, - 0.05349376052618027, - -0.030349301174283028, - -0.024011710658669472, - 0.0867428407073021, - -0.02435683272778988, - -0.02954503335058689, - -0.08889515697956085, - -0.04400339350104332, - -0.018109597265720367, - -0.10312674939632416, - -0.041869960725307465, - 0.04156764969229698, - -0.01929757371544838, - 0.0648110881447792, - 0.009118318557739258, - -0.0038376441225409508, - 0.05363079532980919, - 0.06863442808389664, - 0.004525018390268087, - 0.008223247714340687, - -0.060197990387678146, - -0.06310275942087173, - -0.015021800994873047, - -0.013856987468898296, - -0.024735422804951668, - -0.02569848857820034, - -0.0315413773059845, - 0.12786003947257996, - 0.0011884717969223857, - -0.00757340295240283, - 0.043278712779283524, - 0.014143950305879116, - 0.042446646839380264, - 0.003966683521866798, - -0.006093714851886034, - -0.03061152622103691, - -0.036428604274988174, - 0.07054053992033005, - 0.04291774705052376, - 0.01018885150551796, - -0.053736407309770584, - -0.0395071916282177, - 0.005340927746146917, - -0.02231704257428646, - -0.05836336687207222, - 0.0663379356265068, - -0.08509992808103561, - 0.025149287655949593, - -0.06464405357837677, - 0.0560990609228611, - 0.01679554581642151, - -0.0030728273559361696, - -0.02426706813275814, - -0.00023274740669876337, - -0.056554846465587616, - -0.0025430337991565466, - 0.07918895781040192, - -0.05696763098239899, - -0.0072678509168326855, - -0.02480824664235115, - 0.04040495306253433, - 0.07221049070358276, - 0.0024300136137753725, - 0.015402665361762047, - -0.0687580481171608, - 0.06608901917934418, - 0.0035700497683137655, - -0.06248387321829796, - 0.053245991468429565, - 0.09190328419208527, - 0.0009737921645864844, - -0.003137845080345869, - -0.12396562844514847, - -0.014093367382884026, - 0.09479106217622757, - 0.02556724287569523, - -0.06639508157968521, - -0.02546246536076069, - -0.0506269633769989, - -0.1386348009109497, - -3.6529609640743116e-34, - -0.01269697304815054, - 0.008826217614114285, - 0.061091262847185135, - 0.10967041552066803, - 0.027269065380096436, - -0.043446287512779236, - -0.0058680265210568905, - 0.02325800247490406, - 0.052677132189273834, - -0.03071192465722561, - -0.05221113562583923, - -0.03368789330124855, - 0.13911472260951996, - -0.05078120529651642, - 0.015582340769469738, - -0.03320647031068802, - 0.018242647871375084, - -0.0724089965224266, - -0.01647118106484413, - -0.009546183049678802, - 0.04724115505814552, - 0.016156544908881187, - -0.030115414410829544, - -0.03000267781317234, - -0.00028491480043157935, - 0.06315216422080994, - 0.018432103097438812, - -0.01028120331466198, - -0.08467312902212143, - -0.025300946086645126, - 0.12236674129962921, - 0.0045727314427495, - -0.0583915114402771, - 0.08561067283153534, - -0.024420224130153656, - 0.04175036400556564, - 0.06704740971326828, - -0.07799742370843887, - -0.08636744320392609, - -0.006747859064489603, - -0.04664286598563194, - -0.04609360545873642, - -0.04156402871012688, - -0.001496547949500382, - -0.0012599691981449723, - -0.012154355645179749, - -0.011674038134515285, - 0.024814989417791367, - 0.006773214787244797, - 0.029428301379084587, - -0.01722320169210434, - 0.020293841138482094, - -0.12813469767570496, - 0.11289172619581223, - -0.035526614636182785, - 0.0033505423925817013, - 0.01739603839814663, - -0.001771652838215232, - 0.07063547521829605, - 0.015264692716300488, - -0.07010583579540253, - 0.02117868699133396, - -0.05190863087773323, - 0.017682254314422607, - 0.016347073018550873, - -0.056617844849824905, - -0.011006173677742481, - 0.08239301294088364, - -0.017991861328482628, - -0.0028607642743736506, - 0.03403075039386749, - -0.03447555750608444, - -0.07497278600931168, - 0.017877496778964996, - 0.024901997298002243, - 0.001125121139921248, - 0.048149701207876205, - -0.011577627621591091, - -0.05788549780845642, - 0.03651383891701698, - -0.01419283077120781, - -0.043192308396101, - -0.02673066034913063, - -0.011179531924426556, - 0.006381206680089235, - -0.013305948115885258, - 0.07299388200044632, - -0.02967526949942112, - -0.019634000957012177, - 0.026101969182491302, - 0.03237967938184738, - -0.00499308155849576, - 0.05153448507189751, - -0.08542998135089874, - 0.014721574261784554, - -3.338955423259904e-08, - 0.04488585889339447, - -0.047148074954748154, - -0.0640324205160141, - -0.13860781490802765, - 0.03632422164082527, - -0.03801966831088066, - 0.024571413174271584, - 0.00878355372697115, - -0.07595612108707428, - 0.023607945069670677, - 0.10317740589380264, - -0.0855007916688919, - 0.06889460980892181, - -0.00648864870890975, - 0.08344216644763947, - 0.030783597379922867, - 0.07933887094259262, - -0.054010119289159775, - -0.022299563512206078, - -0.012930402532219887, - 0.06527604162693024, - 0.003328177845105529, - 0.06217062473297119, - -0.028922075405716896, - -0.008504550904035568, - 0.04795001447200775, - 0.02141592651605606, - 0.0014492090558633208, - -0.049570925533771515, - 0.027479173615574837, - 0.09821037948131561, - 0.031262218952178955, - 0.01765122078359127, - -0.0004345061315689236, - -0.011527259834110737, - 0.04417387396097183, - -0.03349457308650017, - -0.03799552470445633, - 0.02151508443057537, - -0.023998308926820755, - -0.041356947273015976, - 0.03809981420636177, - -0.042862650007009506, - 0.0785767212510109, - 0.039057012647390366, - -0.0793391615152359, - 0.013255830854177475, - 0.0179001297801733, - -0.03965906798839569, - -0.040104616433382034, - -0.00035185160231776536, - -0.001970064826309681, - 0.02031131274998188, - -0.023235710337758064, - -0.04101382941007614, - -0.042076047509908676, - 0.03147187829017639, - 0.05311312898993492, - -0.06019068509340286, - 0.006541467271745205, - 0.14579345285892487, - 0.08224163949489594, - -0.07540097087621689, - -0.07972784340381622 - ] - }, - { - "type": "NarrativeText", - "element_id": "ff53e9e3e2c0a40c878f3117dfaee12e", - "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.01821090839803219, - 0.14189428091049194, - 0.06296979635953903, - 0.021435990929603577, - -0.0342990979552269, - 0.0876535102725029, - 0.09852342307567596, - -0.046761978417634964, - -0.032796505838632584, - -0.05792921781539917, - -0.04649097099900246, - -0.023917632177472115, - 0.004826297052204609, - -0.06528623402118683, - 0.013964676298201084, - -0.00030672684079036117, - -0.017572997137904167, - 0.04033080115914345, - 0.0012798308162018657, - 0.11687067151069641, - -0.02767736278474331, - 0.06069065257906914, - 0.004045278765261173, - 0.0030521780718117952, - -0.046026796102523804, - 0.009411415085196495, - 0.028842885047197342, - -0.02954116091132164, - -0.014768589287996292, - 0.018804417923092842, - -0.018149767071008682, - -0.0018351237522438169, - -0.005772426724433899, - 0.0782475546002388, - -0.09304408729076385, - -0.00396999716758728, - -0.02550867199897766, - -0.032713666558265686, - 0.027754001319408417, - -0.007668178062886, - -0.012167312204837799, - -0.042259570211172104, - -0.022764043882489204, - 0.07541150599718094, - 0.03192088007926941, - -0.019112829118967056, - -0.0809827372431755, - -0.024734778329730034, - 0.04502086341381073, - -0.01547539047896862, - -0.09272018820047379, - 0.033542267978191376, - 0.03152655437588692, - -0.07030358910560608, - -0.049778860062360764, - 0.02008306048810482, - 0.03791830316185951, - 0.0015579734463244677, - 0.040449853986501694, - 0.07563000917434692, - 0.0017830145079642534, - 0.050422631204128265, - 0.10320530831813812, - 0.04705667123198509, - -0.047684572637081146, - -0.10446310043334961, - 0.05923539027571678, - 0.020012550055980682, - -0.061900753527879715, - 0.10213825106620789, - 0.042254459112882614, - -0.006701549980789423, - -0.012615542858839035, - -0.11128807812929153, - -0.04897299408912659, - -0.011014256626367569, - 0.00625221012160182, - -0.1231180801987648, - -0.011414057575166225, - 0.015304229222238064, - -0.004650887101888657, - -0.06636469811201096, - -0.07129023969173431, - 0.06300665438175201, - -0.04842934012413025, - -0.09559126943349838, - 0.049481164664030075, - -0.0808960422873497, - -0.041871607303619385, - 0.025684673339128494, - 0.004877278581261635, - -0.021023448556661606, - -0.06338407844305038, - 0.041496120393276215, - 0.026929443702101707, - 0.03656552731990814, - 0.015449454076588154, - -0.048995450139045715, - -0.06652054935693741, - 0.050739482045173645, - 0.05162039026618004, - 0.051657795906066895, - -0.02575691230595112, - 0.027645209804177284, - -0.09982188791036606, - 0.02371579222381115, - -0.026078924536705017, - -0.08350101113319397, - -0.058907654136419296, - -0.010147891007363796, - -0.034378532320261, - -0.11381184309720993, - 0.011026861146092415, - -0.11859527230262756, - -0.022935006767511368, - 0.04787220060825348, - 0.05839496850967407, - -0.03827362135052681, - -0.00955091044306755, - -0.014362015761435032, - 0.0634639710187912, - 0.02453756146132946, - -0.03344390168786049, - 0.11250727623701096, - -0.06982152163982391, - -0.03536441549658775, - 0.004629191011190414, - 1.747000786079875e-33, - 0.03947208821773529, - 0.040333155542612076, - 0.05003226175904274, - 0.0627870038151741, - 0.049182530492544174, - 0.0378626324236393, - -0.002265345072373748, - -0.00036693402216769755, - -0.039146095514297485, - -0.035602301359176636, - -0.014580335468053818, - -0.03102882020175457, - 0.0023757985327392817, - -0.0288905780762434, - -0.10088586807250977, - 0.11166083067655563, - -0.06596998870372772, - -0.00985047873109579, - 0.06851841509342194, - 0.022369928658008575, - 0.007712005637586117, - 0.07309042662382126, - -0.029763471335172653, - 0.07449771463871002, - -0.04152587056159973, - 0.023668093606829643, - 0.0407828688621521, - -0.007492535747587681, - -0.02654390223324299, - 0.010241927579045296, - -0.049055490642786026, - 0.015456822700798512, - 0.054784275591373444, - 0.007424837443977594, - 0.0255171786993742, - -0.008687596768140793, - -0.042407531291246414, - -0.0480441078543663, - -0.07281584292650223, - 0.03303361311554909, - 0.0033997197169810534, - -0.0023227299097925425, - -0.0072356825694441795, - -0.012058882974088192, - -0.1584135740995407, - 0.04674842953681946, - -0.05691581591963768, - 0.002102371072396636, - -0.004185475409030914, - -0.07694176584482193, - -0.021312396973371506, - 0.007585792802274227, - -0.0069068120792508125, - 0.031397804617881775, - -0.023958317935466766, - -0.016157573089003563, - 0.004572156351059675, - 0.04501873999834061, - 0.06532137095928192, - -0.03778347373008728, - 0.10829176753759384, - -0.03116549551486969, - 0.06857903301715851, - -0.0019906088709831238, - -0.020024143159389496, - -0.12234477698802948, - -0.07756857573986053, - 0.05220640078186989, - -0.0482671782374382, - -0.006218504160642624, - -0.03172220289707184, - 0.10647480189800262, - -0.0043113320134580135, - -0.020344864577054977, - -0.028779413551092148, - 0.010282287374138832, - -0.08016345649957657, - 0.0432865247130394, - -0.0010577370412647724, - -0.06618637591600418, - 0.038982994854450226, - 0.0017452012980356812, - 0.022284550592303276, - 0.025656763464212418, - 0.016390766948461533, - -0.06552041321992874, - -0.03440457209944725, - -0.05710837244987488, - -0.03641461580991745, - 0.13124778866767883, - -0.015124861150979996, - 0.04237806051969528, - 0.07488368451595306, - -0.10918773710727692, - -0.08908736705780029, - -5.8269739861680506e-33, - -0.020827768370509148, - -0.003087892895564437, - -0.08207239955663681, - 0.09733551740646362, - -0.008875329047441483, - -0.040471259504556656, - -0.07314587384462357, - 0.07006904482841492, - -0.004497144371271133, - -0.013727361336350441, - 0.033719517290592194, - -0.0396072082221508, - 0.09562670439481735, - -0.08862386643886566, - -0.017600253224372864, - -0.0341934859752655, - 0.10651562362909317, - 0.05324796214699745, - -0.03143731877207756, - 0.06551412492990494, - 0.06156589463353157, - -0.038812994956970215, - 0.009870616719126701, - -0.028915343806147575, - -0.011992327868938446, - 0.009035686030983925, - 0.0987909734249115, - -0.007158730179071426, - -0.11763094365596771, - 0.03427322953939438, - 0.03440479561686516, - 0.011127431876957417, - -0.057399358600378036, - 0.06064264848828316, - 0.0061752162873744965, - 0.09144444018602371, - 0.046914082020521164, - -0.033194951713085175, - 0.010019451379776001, - -0.0070080203004181385, - -0.003974899649620056, - -0.05783272162079811, - 0.018345599994063377, - 0.06880046427249908, - 0.06445208936929703, - -0.017780104652047157, - -0.021879544481635094, - 0.03970015421509743, - 0.05623394623398781, - -0.023330431431531906, - -0.04692263901233673, - 0.015075696632266045, - -0.03732207044959068, - 0.013419993221759796, - -0.030217895284295082, - -0.026605604216456413, - 0.00047731504309922457, - -0.04241754487156868, - 0.08805425465106964, - 0.006248066667467356, - -0.032238107174634933, - -0.02360597811639309, - -0.07872772216796875, - -0.05528230592608452, - -0.02677951008081436, - 0.07355596870183945, - 0.01595303975045681, - 0.0449473112821579, - 0.07241595536470413, - -0.040916807949543, - 0.04634459316730499, - 0.014152943156659603, - -0.003151148557662964, - 0.086457259953022, - 0.002669429872184992, - 0.04503464698791504, - 0.04822568595409393, - -0.07435241341590881, - 0.004517799708992243, - -0.02170669101178646, - -0.00022751234064344317, - -0.08234338462352753, - -0.03115135431289673, - -0.05293332785367966, - 0.062102362513542175, - -0.0447835735976696, - 0.0011613268870860338, - -0.013380859047174454, - -0.08765747398138046, - -0.01565648429095745, - -0.024476435035467148, - 0.006487371399998665, - -0.01591990701854229, - -0.09806496649980545, - 0.02603819966316223, - -5.7562669297794855e-08, - -0.047583162784576416, - -0.04328448697924614, - -0.05637136474251747, - -0.05769981071352959, - -6.81738747516647e-05, - -0.004203977063298225, - -0.004778998903930187, - -0.027043092995882034, - 0.01357234176248312, - 0.006340992171317339, - -0.0012254678877070546, - -0.005992580205202103, - 0.0448201559484005, - -0.02413370832800865, - 0.07855549454689026, - 0.008333065547049046, - -0.022249801084399223, - -0.011054125614464283, - -0.04943384602665901, - 0.017508840188384056, - 0.03386348858475685, - 0.027341939508914948, - -0.011000316590070724, - -0.05664633959531784, - -0.034833863377571106, - 0.005601534154266119, - 0.01789466105401516, - -0.03889448568224907, - -0.03091748245060444, - -0.048188988119363785, - 0.04293283820152283, - 0.02030123583972454, - -0.10848956555128098, - -0.050146300345659256, - -0.05427027493715286, - 0.0692223310470581, - 0.08041663467884064, - -0.07030843943357468, - 0.16752898693084717, - -0.07572435587644577, - -0.029638754203915596, - -0.007868490181863308, - -0.06977297365665436, - 0.006647306494414806, - 0.0333944708108902, - -0.007479228079319, - 0.030018866062164307, - 0.004089192021638155, - 0.012770382687449455, - 0.053249821066856384, - 0.003969372250139713, - 0.029761813580989838, - 0.010942214168608189, - 0.047804854810237885, - 0.006235899403691292, - -0.033541541546583176, - 0.06135937571525574, - 0.04686572775244713, - -0.05125612020492554, - 0.03666789457201958, - 0.061454709619283676, - 0.017497118562459946, - -0.027414871379733086, - -0.017388004809617996 - ] - }, - { - "type": "NarrativeText", - "element_id": "944f331f0e9f276612e232b36f28b5f6", - "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - 0.01609872654080391, - 0.12206722050905228, - 0.10550273954868317, - -0.0030169193632900715, - -0.009118711575865746, - -0.02610721066594124, - 0.11645602434873581, - 0.0013343243626877666, - -0.06382516771554947, - -0.07452850043773651, - -0.06803740561008453, - 0.05932634696364403, - -0.02155320905148983, - -0.014978386461734772, - -0.0011992445215582848, - 0.040874093770980835, - 0.020881203934550285, - -0.07886741310358047, - -0.08999329060316086, - 0.07935329526662827, - 0.05901937559247017, - 0.018977954983711243, - 0.0910627618432045, - -0.05023808404803276, - 0.0005669573438353837, - -0.04943303391337395, - 0.06656932085752487, - 0.00708664208650589, - -0.020673977211117744, - 0.03505445644259453, - -0.046781525015830994, - -0.009728481061756611, - 0.00017307557573076338, - 0.062065489590168, - -0.06824936717748642, - 0.05402689054608345, - 0.04232082888484001, - 0.017115388065576553, - 0.029098916798830032, - -0.04628005251288414, - -0.04206351563334465, - -0.02802305482327938, - -0.04234213009476662, - 0.02082713320851326, - 0.02718687243759632, - -0.0008981685969047248, - -0.002200718969106674, - 0.006638735998421907, - -0.0012337950756773353, - -0.0917057991027832, - -0.10048782825469971, - 0.011058949865400791, - -0.1791040599346161, - -0.025678221136331558, - 0.018635211512446404, - 0.10850796848535538, - 0.047767408192157745, - 0.07271455228328705, - -0.026165228337049484, - -0.025355264544487, - -0.08750183135271072, - -0.13322925567626953, - 0.05995915085077286, - 0.01659224182367325, - 0.04179232195019722, - 0.06866785138845444, - 0.003790231654420495, - 0.0075165932066738605, - -0.12451988458633423, - 0.043020401149988174, - -0.008502614684402943, - 0.007917926646769047, - -0.017350370064377785, - 0.008838623762130737, - -0.06485218554735184, - -0.06691094487905502, - 0.03426596522331238, - 0.024008089676499367, - 0.05266363173723221, - 0.04014591500163078, - -0.04925774410367012, - 0.04040862247347832, - -0.03167680650949478, - 0.042694054543972015, - -0.038647063076496124, - -0.11679277569055557, - 0.04526420682668686, - -0.11612901836633682, - 0.011789222247898579, - 0.04879589006304741, - -0.09023935347795486, - 0.01673692837357521, - -0.02596200816333294, - 0.0006217096233740449, - -0.03742058202624321, - 0.06419850140810013, - -0.0657867044210434, - 0.003506968030706048, - -0.14764676988124847, - 0.04701691120862961, - 0.16012610495090485, - 0.05403438210487366, - -0.0016633996274322271, - 0.009381184354424477, - 0.007924608886241913, - 0.01786777749657631, - -0.05079204589128494, - -0.029873404651880264, - -0.12929809093475342, - -0.015970297157764435, - -0.008158445358276367, - -0.045339327305555344, - -0.016188427805900574, - -0.08447936922311783, - 0.04993673786520958, - 0.03646785020828247, - 0.011774525977671146, - 0.03200354799628258, - -0.023724231868982315, - -0.0006428144406527281, - -0.018292110413312912, - -0.01645626127719879, - -0.02223496325314045, - 0.11142494529485703, - -0.011918283998966217, - -0.06907562911510468, - 0.02104620449244976, - 8.309489669081093e-34, - 0.04458507150411606, - 0.012564167380332947, - 0.00724955415353179, - -0.006804605480283499, - -0.006219311151653528, - -0.010936964303255081, - -0.021314971148967743, - 0.011711995117366314, - -0.007926414720714092, - -0.021304219961166382, - 0.02477290853857994, - -0.03400973230600357, - 0.09787909686565399, - 0.022260181605815887, - -0.1016092300415039, - 0.013866880908608437, - -0.012803778983652592, - 0.0075775450095534325, - 0.06363657861948013, - -0.030455652624368668, - 0.030187418684363365, - 0.04316478222608566, - -0.01825382374227047, - -0.06823724508285522, - -0.05853891000151634, - -0.0786885917186737, - 0.0274016372859478, - -0.007098029833287001, - 0.06252356618642807, - 0.030469203367829323, - -0.01957341469824314, - 0.03377339616417885, - -0.011660650372505188, - 0.010068540461361408, - 0.038045819848775864, - 0.02914927341043949, - -0.027130283415317535, - 0.045694224536418915, - -0.019928015768527985, - 0.005420621018856764, - 0.05390109494328499, - 0.08124633878469467, - -0.012646442279219627, - -0.04256308823823929, - -0.06275372952222824, - 0.07635439932346344, - -0.03243841975927353, - 0.005659850314259529, - -0.011066016741096973, - -0.028473876416683197, - 0.0006818226538598537, - 0.0010741247097030282, - 0.07769523561000824, - 0.05876915901899338, - -0.013427264988422394, - 0.011100493371486664, - 0.04551602154970169, - -0.026398124173283577, - 0.025685815140604973, - -0.09366091340780258, - 0.05352221429347992, - -0.048836588859558105, - -0.008104451932013035, - -0.010864955373108387, - -0.013614547438919544, - 0.004153645131736994, - -0.07175569981336594, - -0.08854752033948898, - -0.03500461205840111, - -0.05254851654171944, - -0.027614863589406013, - 0.08625131845474243, - -0.03546272590756416, - -0.02887333370745182, - -0.10159868001937866, - -0.028719639405608177, - -0.03492984548211098, - -0.0170428566634655, - 0.07445263117551804, - -0.0438823327422142, - 0.053207747638225555, - 0.04853729531168938, - -0.12567447125911713, - -0.009663466364145279, - -0.04526982456445694, - -0.03588780015707016, - 0.01762925647199154, - -0.005348447244614363, - -0.01074414886534214, - 0.05340660363435745, - -0.029768286272883415, - 0.0049492972902953625, - 0.012169796973466873, - -0.015573430806398392, - -0.09747888147830963, - -3.1041271975324454e-33, - 0.0194513238966465, - 0.009628918021917343, - -0.027822740375995636, - 0.09368891268968582, - -0.0359320230782032, - 0.010453308001160622, - -0.02866782248020172, - 0.0221566092222929, - 0.06766410917043686, - 0.04893945902585983, - -0.0015285926638171077, - -0.04352252930402756, - 0.10322573781013489, - -0.08220577239990234, - -0.042911555618047714, - 0.014294339343905449, - 0.014013602398335934, - -0.03485841676592827, - -0.013450855389237404, - -0.002059196587651968, - 0.017987089231610298, - -0.006127191241830587, - 0.07154537737369537, - 0.04335159808397293, - -0.014778303913772106, - 0.02115732803940773, - 0.0606507882475853, - -0.04877571761608124, - -0.05862700194120407, - -0.08890953660011292, - 0.09608694911003113, - 0.0009899098658934236, - -0.0330541767179966, - 0.01996663585305214, - 0.001770142582245171, - 0.016169700771570206, - -0.04747443646192551, - 0.022898582741618156, - -0.07072479277849197, - 0.0047416044399142265, - 0.03560169041156769, - 0.0012952463002875447, - -0.03315557539463043, - 0.008455471135675907, - -3.921203187928768e-06, - -0.020027833059430122, - 0.021393371745944023, - -0.10040563344955444, - -0.01480659656226635, - -0.03795131295919418, - -0.014075894840061665, - -0.06112401559948921, - 0.06571064889431, - 0.04188745096325874, - 0.023539962247014046, - -0.05987747758626938, - 0.00851847231388092, - 0.02684272639453411, - 0.04138780012726784, - -0.023006772622466087, - 0.002272712765261531, - -0.06703063100576401, - 0.008017421700060368, - 0.01765957660973072, - 0.0238480307161808, - 0.04377725347876549, - 0.0014354476006701589, - -0.030951501801609993, - 0.05077678710222244, - 0.041310399770736694, - 0.02345573715865612, - 0.044847581535577774, - -0.012993763200938702, - -0.07765231281518936, - 0.06419993191957474, - 0.07914774864912033, - -0.01010692585259676, - -0.04605787247419357, - -0.08842464536428452, - -0.003118990920484066, - 0.02931838668882847, - -0.06857184320688248, - 0.03788713738322258, - -0.03523596003651619, - -0.12513650953769684, - -0.05887526646256447, - -0.018760619685053825, - 0.028921905905008316, - -0.027741525322198868, - 0.10118293762207031, - 0.02542966790497303, - 0.020331334322690964, - 0.023389235138893127, - -0.033613454550504684, - 0.011279042810201645, - -4.0931748657158096e-08, - 0.007863720878958702, - -0.13112495839595795, - -0.04701833799481392, - 0.04092184826731682, - -0.007581907790154219, - 0.07654201984405518, - 0.064987912774086, - 0.030623596161603928, - -0.08215741813182831, - 0.013813535682857037, - 0.02110140770673752, - 0.06278444081544876, - 0.00976646039634943, - 0.03104899637401104, - 0.016768503934144974, - 0.013764707371592522, - 0.04950803518295288, - -0.018416276201605797, - 0.016119973734021187, - 0.037164896726608276, - 0.06645975261926651, - 0.02553415112197399, - 0.02284054271876812, - 0.007527423556894064, - 0.007892807014286518, - 0.006764109246432781, - 0.018308067694306374, - -0.06779982149600983, - -0.08241979032754898, - 0.041591987013816833, - 0.018581248819828033, - 0.04872804880142212, - 0.004406395833939314, - -0.03522345796227455, - -0.001023659366182983, - -0.02906104177236557, - -0.002340866019949317, - 0.034798331558704376, - 0.06409124284982681, - 0.04808121174573898, - 0.03085481934249401, - 0.05560770630836487, - -0.06750525534152985, - 0.024545462802052498, - 0.09970931708812714, - 0.005820555612444878, - 0.05302456021308899, - 0.007615718524903059, - -0.07209459692239761, - -0.012196524068713188, - 0.04865802451968193, - 0.04767588898539543, - -0.04307388886809349, - -0.04726945981383324, - 0.004431530833244324, - 0.03461168333888054, - -0.04459869861602783, - 0.015400665812194347, - -0.07706720381975174, - 0.012848886661231518, - 0.03197343647480011, - 0.1119411364197731, - -0.10519342869520187, - -0.10109532624483109 - ] - }, - { - "type": "NarrativeText", - "element_id": "9ff1d8f7e93d526d0e3a174a51850ec8", - "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.01610482670366764, - 0.05125338211655617, - -0.005827299784868956, - 0.09465290606021881, - -0.0277389008551836, - 0.04864745959639549, - 0.06249864399433136, - -0.054742179811000824, - 0.005165748298168182, - -0.09965425729751587, - -0.01438001450151205, - 0.04313591122627258, - -0.06380165368318558, - 0.016726698726415634, - 0.026246551424264908, - 0.08337194472551346, - -0.010945141315460205, - -0.014145425520837307, - 7.167678995756432e-05, - 0.13305307924747467, - -0.08622103929519653, - 0.01746830716729164, - 0.019615380093455315, - 0.03573538362979889, - -0.06584517657756805, - 0.023946993052959442, - 0.055600911378860474, - -0.005579685792326927, - 0.06655526906251907, - 0.01074755284935236, - -0.015564651228487492, - -0.07826479524374008, - 0.011902881786227226, - 0.04096625745296478, - 0.0073626236990094185, - 0.10021869838237762, - -0.07527383416891098, - 0.01305284071713686, - 0.041784755885601044, - 0.008722526021301746, - 0.04301176220178604, - -0.04666911065578461, - -0.0106803635135293, - -0.0453093945980072, - 0.029875554144382477, - -0.07483328878879547, - -0.04754325747489929, - -0.03723842650651932, - 0.01381721068173647, - -0.11729475855827332, - -0.07827991992235184, - 0.003367035649716854, - -0.12662997841835022, - -0.04176894202828407, - 0.03585528954863548, - 0.049924954771995544, - 0.005937882699072361, - 0.03759896382689476, - -0.006769762374460697, - 0.058931440114974976, - 0.0066518234089016914, - 0.057244524359703064, - 0.0627714991569519, - 0.04932573810219765, - -0.004728052299469709, - 0.04949145391583443, - 0.03973150625824928, - 0.010035614483058453, - -0.10073575377464294, - 0.022169973701238632, - -0.0748724564909935, - -0.0414479561150074, - 0.02511800266802311, - -0.034044332802295685, - -0.05399055778980255, - -0.07167425006628036, - 0.07152590900659561, - -0.04223905876278877, - 0.019727656617760658, - 0.0659174844622612, - 0.03009355627000332, - -0.029379121959209442, - -0.030472930520772934, - -0.013006247580051422, - -0.07183650881052017, - -0.057955291122198105, - 0.06737776845693588, - -0.012468202039599419, - 0.08337195217609406, - -0.008741258643567562, - 0.03279910236597061, - 0.026616839691996574, - 0.029712636023759842, - 0.010602050460875034, - 0.06889159977436066, - 0.025316447019577026, - -0.1048581525683403, - 0.03166161850094795, - -0.048036471009254456, - 0.011014832183718681, - 0.030293121933937073, - 0.0508931465446949, - -0.042509641498327255, - -0.05075138434767723, - -0.016532093286514282, - 0.014843813143670559, - 0.018969547003507614, - -0.12581060826778412, - -0.017050109803676605, - 0.021225253120064735, - -0.11021722108125687, - -0.01098854374140501, - 0.03386017680168152, - -0.05214056000113487, - 0.031074296683073044, - 0.0539565272629261, - -0.018187059089541435, - -0.03948015347123146, - 0.03541719540953636, - -0.025734495371580124, - 0.05824299529194832, - -0.06790594011545181, - 0.018384568393230438, - 0.0920003280043602, - 0.009596326388418674, - 0.00292061734944582, - 0.0547582171857357, - 1.01328669149087e-33, - 0.06008734181523323, - -0.09051958471536636, - 0.05675503611564636, - 0.004556877072900534, - -0.021692266687750816, - 0.025838887318968773, - 0.004368612542748451, - -0.08353646844625473, - -0.01712646894156933, - -0.041716162115335464, - -0.01787552982568741, - -0.090555340051651, - -0.022911276668310165, - -0.030500125139951706, - -0.08570834994316101, - 0.023714762181043625, - -0.05599593371152878, - -0.03581826388835907, - -0.014550261199474335, - 0.04697931557893753, - 0.03575429692864418, - 0.0009537882287986577, - -0.04018554836511612, - 0.025454750284552574, - -0.058431074023246765, - -0.06453957408666611, - 0.10423276573419571, - 0.013875417411327362, - -0.0054871514439582825, - 0.0010717586847022176, - -0.06231102719902992, - -0.006021364126354456, - -0.0301487036049366, - -0.04943401366472244, - 0.02952142059803009, - 0.10211921483278275, - -0.07292629033327103, - 0.13485057651996613, - -0.008966878056526184, - -0.007454921957105398, - -0.046335361897945404, - 0.055415865033864975, - 0.060770947486162186, - 0.047279711812734604, - -0.0384218692779541, - 0.012684510089457035, - -0.051487457007169724, - -0.053674276918172836, - -0.09879986196756363, - -0.014839579351246357, - -0.0997442901134491, - 0.028402229771018028, - 0.007323282305151224, - -0.040168605744838715, - -0.002353295451030135, - 0.02973511442542076, - 0.060454487800598145, - 0.06601224094629288, - 0.014262672513723373, - -0.07497284561395645, - -0.058932166546583176, - -0.09926281124353409, - -0.051700349897146225, - -0.08867475390434265, - -0.024467885494232178, - 0.01485553290694952, - 0.000490230624563992, - -0.04947168380022049, - -0.06529595702886581, - -0.0423608161509037, - -0.007980813272297382, - 0.05371476337313652, - -0.029511762782931328, - -0.05754321068525314, - -0.10310119390487671, - -0.002686432097107172, - 0.01842455007135868, - 0.0070448522455990314, - -0.027565181255340576, - -0.06457348167896271, - -0.01054664421826601, - -0.03435391187667847, - -0.01008880976587534, - 0.09299925714731216, - 0.10657938569784164, - -0.056471895426511765, - -0.05445379763841629, - -0.07525269687175751, - -0.07827096432447433, - 0.0779527947306633, - 0.03933518007397652, - 0.07512396574020386, - 0.1232500895857811, - -0.06564854085445404, - -0.07660932093858719, - -3.5192445786596897e-33, - 0.0510014183819294, - -0.001815489842556417, - -0.08879201114177704, - 0.14728926122188568, - 0.010032013058662415, - 0.05061608552932739, - -0.10411451756954193, - 0.011280626989901066, - -0.05086619779467583, - 0.058970943093299866, - 0.003801255254074931, - -0.025784963741898537, - 0.06590864062309265, - 0.03221660107374191, - -0.012508578598499298, - -0.022222409024834633, - 0.009842983447015285, - 0.02998879738152027, - -0.0033919354900717735, - 0.009476672857999802, - -0.041535697877407074, - 0.05869355425238609, - -0.008409304544329643, - -0.008358325809240341, - 0.04683196544647217, - 0.06554035097360611, - 0.08408452570438385, - 0.012960228137671947, - -0.1073995903134346, - -0.047734543681144714, - 0.05281081795692444, - -0.02889961563050747, - -0.10605499148368835, - 0.010272017680108547, - 0.03618711233139038, - 0.05617084726691246, - 0.060843393206596375, - -0.09061261266469955, - -0.054288916289806366, - -0.012963452376425266, - 0.016691073775291443, - -0.016843270510435104, - -0.03512909635901451, - 0.03692169487476349, - -0.013455664739012718, - 0.015560205094516277, - 0.044837720692157745, - -0.03817975893616676, - -0.023381013423204422, - 0.03243393450975418, - -0.03296266868710518, - -0.028760399669408798, - -0.0438658706843853, - 0.03029033914208412, - 0.06805291026830673, - -0.06023215875029564, - -0.020878812298178673, - -0.0380665585398674, - 0.04697101190686226, - -0.05305470898747444, - -0.017459893599152565, - -0.0225212424993515, - -0.0790385827422142, - 0.023562666028738022, - -0.022720472887158394, - -0.02160538174211979, - 0.004828989505767822, - -0.03439774736762047, - -0.034284114837646484, - 0.012214817106723785, - -0.02009177766740322, - 0.035359326750040054, - -0.09349168092012405, - -0.014218195341527462, - -0.018606074154376984, - 0.04589645192027092, - -0.0029396654572337866, - -0.0654912143945694, - 0.0044088177382946014, - -0.05500967428088188, - -0.00021258574270177633, - -0.028352705761790276, - 0.05742249637842178, - -0.029054759070277214, - -0.0667227953672409, - -0.04144906997680664, - 0.03674278035759926, - 0.03910469636321068, - -0.02284976653754711, - 0.0744740292429924, - -0.010481500066816807, - 0.017465591430664062, - -0.007322142831981182, - -0.0860389843583107, - -0.02918790653347969, - -4.020765231871337e-08, - 0.041963621973991394, - -0.05520770326256752, - -0.08829087764024734, - -0.04435349628329277, - -0.022022850811481476, - 0.001098370528779924, - 0.03698139637708664, - 0.013662584125995636, - -0.0710933580994606, - 0.11878079921007156, - 0.043454259634017944, - -0.020296722650527954, - 0.04800381138920784, - 0.05399679020047188, - 0.011598949320614338, - 0.02649381384253502, - 0.06979779899120331, - -0.005435304716229439, - 0.0070427763275802135, - 0.031269561499357224, - 0.06965865939855576, - 0.00803336314857006, - 0.03416164591908455, - 0.005839186254888773, - 0.013039788231253624, - 0.05242527276277542, - 0.03213014453649521, - 0.012305348180234432, - -0.08455014228820801, - -0.004462853539735079, - 0.06343657523393631, - 0.07933670282363892, - -0.08516852557659149, - -0.017162835225462914, - -0.004258512984961271, - -0.06407918781042099, - 0.07798885554075241, - -0.025454271584749222, - 0.016501426696777344, - 0.026065517216920853, - 0.010165649466216564, - 0.06678290665149689, - -0.02757350355386734, - 0.011442668735980988, - 0.04229017719626427, - -0.019121825695037842, - 0.04182105511426926, - -0.04179900139570236, - 0.010216103866696358, - 0.04622019827365875, - -0.010210856795310974, - 0.006616200320422649, - 0.04898656904697418, - 0.0017409288557246327, - 0.0059212371706962585, - -0.018594294786453247, - 0.05280454829335213, - 0.0533737950026989, - -0.030217604711651802, - -0.034488774836063385, - 0.056356512010097504, - 0.038436610251665115, - -0.02287379652261734, - -0.1043722853064537 - ] - }, - { - "type": "NarrativeText", - "element_id": "92ccca74969ed207c2b36cecb1b1b677", - "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - 0.011866243556141853, - 0.08889525383710861, - 0.09668373316526413, - 0.023566389456391335, - 0.08067434281110764, - 0.024664608761668205, - 0.16279205679893494, - -0.05037917196750641, - -0.05566133186221123, - -0.05389242619276047, - -0.078611820936203, - -0.06395158916711807, - -0.04685981199145317, - -0.04426385462284088, - 0.05116282403469086, - 0.0160899106413126, - -0.00420393468812108, - -0.00604472728446126, - -0.02625413052737713, - 0.024689819663763046, - 0.033023636788129807, - -0.06363381445407867, - 0.021082047373056412, - 0.037574246525764465, - -0.041602276265621185, - 0.010709786787629128, - 0.0193449966609478, - -0.021392816677689552, - -0.027126139029860497, - -0.012032430619001389, - -0.07863089442253113, - -0.07243058830499649, - -0.04903903976082802, - 0.06997495144605637, - -0.012854443863034248, - 0.08667638152837753, - 0.031022487208247185, - -0.06254471838474274, - 0.039497584104537964, - 0.044366080313920975, - 0.00010912134894169867, - -0.059773776680231094, - 0.014428182505071163, - 0.024807944893836975, - -0.0034132814034819603, - -0.017541909590363503, - 0.08461368083953857, - 0.005840898025780916, - 0.008691012859344482, - 0.024143574759364128, - -0.04246651008725166, - -0.004703774116933346, - -0.01601823978126049, - -0.10551299899816513, - -0.020300792530179024, - 0.079808808863163, - 0.07783838361501694, - -0.06705289334058762, - 0.0600367933511734, - -0.006285223178565502, - -0.052225712686777115, - 0.009230416268110275, - -0.003882045391947031, - 0.11813551932573318, - -0.012421286664903164, - -0.04964107647538185, - 0.019924264401197433, - -0.006942416075617075, - -0.047498952597379684, - -0.0092844907194376, - 0.002182289492338896, - -0.009434838779270649, - -0.016101693734526634, - -0.03863585367798805, - -0.055425092577934265, - -0.013211050070822239, - 0.04711172357201576, - 0.015680355951189995, - 0.009487486444413662, - 0.027162928134202957, - -0.026584068313241005, - -0.09472533315420151, - -0.028613077476620674, - -0.016398660838603973, - 0.025397446006536484, - -0.05883358046412468, - -0.031065234914422035, - -0.0722942128777504, - -0.011319995857775211, - 0.028573624789714813, - 0.015606842003762722, - -0.05373374745249748, - -0.0284283384680748, - 0.12794795632362366, - 0.00036242985515855253, - -0.021257309243083, - 0.012174960225820541, - 0.052161529660224915, - -0.01188338827341795, - 0.07691037654876709, - 0.023801149800419807, - 0.16610784828662872, - 0.028950247913599014, - 0.10628700256347656, - -0.08720766752958298, - -0.07669955492019653, - -0.08043919503688812, - -0.00636324705556035, - -0.048864737153053284, - -0.029196567833423615, - -0.021198799833655357, - -0.008950486779212952, - 0.05359950661659241, - -0.002875861246138811, - 0.010996905155479908, - -0.01853766292333603, - 0.09927273541688919, - -0.05339926481246948, - -0.04690808057785034, - -0.04743364453315735, - 0.08799826353788376, - 0.07006954401731491, - -0.03933412954211235, - -0.02058088220655918, - -0.12285801768302917, - -0.014865328557789326, - 0.017745865508913994, - -5.0484382001466955e-33, - -0.030972838401794434, - 0.09441109001636505, - 0.06604082137346268, - 0.037709347903728485, - -0.006329773925244808, - 0.03512585535645485, - -0.03221408277750015, - 0.016415290534496307, - 0.022937318310141563, - -0.04356161132454872, - -0.03330241143703461, - -0.1366702914237976, - -0.033474214375019073, - -0.14437949657440186, - -0.18521352112293243, - 0.06414131075143814, - 0.027527576312422752, - 0.007137635722756386, - 0.049748752266168594, - -0.009062962606549263, - 0.02308008074760437, - 0.08295858651399612, - -0.012124757282435894, - -0.0045729815028607845, - -0.025415800511837006, - -0.010783489793539047, - 0.0871695801615715, - -0.04952632263302803, - 0.006761748343706131, - 0.020057082176208496, - -0.004139161668717861, - 0.004545574076473713, - 0.0686500295996666, - -0.06335048377513885, - 0.0464765802025795, - -0.05477472022175789, - -0.009629021398723125, - -0.08248250186443329, - -0.014563480392098427, - -0.0018979765009135008, - -0.024014713242650032, - -0.04390757158398628, - 0.048891082406044006, - 0.0008528573089279234, - -0.019986242055892944, - 0.06498352438211441, - 7.351673048106022e-06, - 0.09102393686771393, - 0.10025796294212341, - 0.03749712556600571, - -0.011586220003664494, - -0.0652185007929802, - -0.04918010160326958, - -0.02891961857676506, - 0.01457853615283966, - -0.01993662491440773, - -0.011860687285661697, - 0.03682943060994148, - 0.0452398918569088, - -0.08813270926475525, - 0.06941300630569458, - -0.05927511677145958, - 0.06256061047315598, - 0.05739177018404007, - -0.023394592106342316, - -0.054727423936128616, - -0.04232924059033394, - -0.0034762327559292316, - 0.028198756277561188, - 0.054639123380184174, - -0.04186933487653732, - 0.12883156538009644, - 0.01251488458365202, - 0.0248282290995121, - -0.025926433503627777, - 0.029718393459916115, - -0.008693775162100792, - -0.08170245587825775, - 0.04893607646226883, - 0.05579773709177971, - -0.04082988575100899, - 0.062129758298397064, - 0.03175906836986542, - 0.026697387918829918, - 0.09816811233758926, - -0.06526180356740952, - 0.0019765058532357216, - -0.09599479287862778, - 0.054348159581422806, - 0.09502343088388443, - -0.02476978674530983, - 0.0012055139522999525, - -0.045463256537914276, - -0.06660450249910355, - -0.01966599002480507, - 2.3062532437431174e-33, - 0.052374545484781265, - -0.05992850288748741, - 0.037788182497024536, - 0.08114951848983765, - 6.414770177798346e-05, - -0.05006162077188492, - 0.008029215037822723, - 0.06410732120275497, - 0.040976833552122116, - 0.04028773307800293, - -0.030725769698619843, - -0.03717454895377159, - 0.10085516422986984, - -0.09978596121072769, - 0.0014208003412932158, - -0.051919739693403244, - 0.12550193071365356, - -0.06505941599607468, - 0.012739790603518486, - 0.08034614473581314, - -0.02975529059767723, - -0.04943246766924858, - 0.02030806988477707, - -0.013933042995631695, - -0.005743860732764006, - -0.008314114063978195, - 0.029602225869894028, - -0.059502582997083664, - -0.07099646329879761, - -0.04468968138098717, - 0.021866321563720703, - -0.0315832644701004, - -0.04854825511574745, - 0.0677240639925003, - -0.037430282682180405, - 0.06331313401460648, - 0.0068172854371368885, - -0.009526307694613934, - 0.06733030080795288, - 0.06829550117254257, - -0.022175446152687073, - -0.09820756316184998, - -0.004189577419310808, - 0.06697065383195877, - 0.003556044539436698, - -0.05099102482199669, - -0.055892933160066605, - 0.04070524871349335, - 0.07824969291687012, - 0.047288864850997925, - -0.06245827302336693, - 0.03506584092974663, - -0.07826394587755203, - -0.0011259635211899877, - 0.01275861170142889, - 0.01283238735049963, - -0.0638238862156868, - -0.041369177401065826, - 0.05821382999420166, - 0.0121921356767416, - -0.033490877598524094, - 0.004410061985254288, - -0.04708581045269966, - -0.07522597908973694, - -0.03232262656092644, - 0.007748166564851999, - -0.04920271411538124, - 0.0786455050110817, - 0.044529229402542114, - 0.06888322532176971, - 0.04066084697842598, - -0.04167947545647621, - -0.0155326584354043, - 0.006853732746094465, - -0.01296643540263176, - -0.019156215712428093, - 0.10059625655412674, - -0.03500260412693024, - 0.009413093328475952, - -0.06461603194475174, - -0.006110424175858498, - 0.021075008437037468, - -0.025133145973086357, - -0.06443876028060913, - 0.07268252223730087, - -0.07528214156627655, - -0.010372307151556015, - 0.049014750868082047, - -0.008247259072959423, - -0.04735469073057175, - 0.0038357325829565525, - -0.02234756574034691, - 0.0052635446190834045, - -0.1019153967499733, - 0.011908848769962788, - -3.435564011056158e-08, - 0.01710052415728569, - -0.011031636968255043, - 0.010672705247998238, - -0.0347326323390007, - -0.045531097799539566, - -0.05369754508137703, - -0.043721843510866165, - -0.06134836748242378, - -0.010886133648455143, - 0.0071554225869476795, - 0.04034580662846565, - 0.02104276232421398, - -0.020796742290258408, - -0.06441263109445572, - 0.03161946311593056, - 0.01229005679488182, - -0.03710019588470459, - -0.04589265584945679, - -0.01625160686671734, - -0.03238838165998459, - -0.03301577270030975, - 0.029694775119423866, - 0.040752191096544266, - 0.019342439249157906, - -0.03307646885514259, - 0.05074732005596161, - 0.022381633520126343, - 0.0169807281345129, - 0.01737947203218937, - -0.0007204238791018724, - 0.0007866729865781963, - 0.02885904721915722, - -0.022456925362348557, - -0.03294430673122406, - -0.017716819420456886, - 0.02514677308499813, - 0.006066956557333469, - -0.025896335020661354, - 0.06328636407852173, - -0.06675472110509872, - 0.0495077483355999, - -0.029641110450029373, - -0.038542091846466064, - 0.05578588321805, - 0.013056338764727116, - 0.016117483377456665, - -0.035832375288009644, - 0.03434690833091736, - 0.010894053615629673, - -0.015218131244182587, - -0.05202468857169151, - -0.037301283329725266, - 0.076298788189888, - 0.027707567438483238, - -0.010397735051810741, - 0.06813449412584305, - 0.03388851881027222, - -0.03890654817223549, - 0.034284405410289764, - 0.07747285813093185, - 0.01816493086516857, - 0.022815076634287834, - -0.026967773213982582, - -0.06140937656164169 - ] - }, - { - "type": "NarrativeText", - "element_id": "fc189a009c8fc4507bd42d9601bdb566", - "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.010653946548700333, - 0.11256983131170273, - 0.12362264096736908, - 0.033492617309093475, - 0.10327771306037903, - 0.04370428994297981, - 0.1903470903635025, - -0.04881716147065163, - -0.04538901150226593, - -0.06825294345617294, - -0.06457121670246124, - 0.03152632713317871, - -0.07903747260570526, - -0.061386387795209885, - 0.056862909346818924, - -0.04516022279858589, - -0.05691821873188019, - -0.04495846480131149, - 0.0268926490098238, - 0.04386984184384346, - 0.05656356364488602, - 0.007857796736061573, - 0.051330313086509705, - 0.08910007774829865, - -0.010225413367152214, - 0.04440242424607277, - -0.06170351058244705, - 0.01380672212690115, - -0.024510039016604424, - 0.033094946295022964, - -0.05924659222364426, - 0.02289014682173729, - -0.01162855513393879, - 0.014852096326649189, - 0.008510804735124111, - -0.04698104411363602, - -0.015184656716883183, - -0.038893185555934906, - 0.020497048273682594, - 0.07892897725105286, - -0.00766504043713212, - -0.08813467621803284, - -0.10797024518251419, - -0.020603056997060776, - -0.05402370169758797, - -0.052811089903116226, - 0.1027015671133995, - -0.060565486550331116, - 0.04975073039531708, - 0.06786340475082397, - 0.02613566257059574, - -0.056518930941820145, - -0.04140464588999748, - -0.12265300005674362, - 0.08926378190517426, - -0.03595893457531929, - 0.10509677976369858, - -0.014660114422440529, - 0.04789544641971588, - -0.039322853088378906, - -0.039191365242004395, - -0.02193138189613819, - -0.04562743753194809, - 0.10790623724460602, - 0.027846314013004303, - 0.02832428552210331, - -0.00028395093977451324, - 0.03493306413292885, - -0.10772647708654404, - 0.028075600042939186, - 0.02785682864487171, - -0.011980119161307812, - 0.026298213750123978, - -0.020852308720350266, - -0.10088098794221878, - 0.04576757922768593, - 0.049791477620601654, - 0.002480587689206004, - 0.03908182308077812, - 0.05512985214591026, - -0.018136367201805115, - -0.14219200611114502, - -0.0005619940930046141, - -0.05667608231306076, - 0.05778316408395767, - 0.03216353431344032, - -0.0436590351164341, - -0.0037081732880324125, - -0.04961114376783371, - 0.017681317403912544, - -0.11600944399833679, - 0.023688914254307747, - 0.009782613255083561, - 0.09010549634695053, - 0.002083333907648921, - 0.04555704444646835, - -0.07638200372457504, - 0.03248212859034538, - 0.008907423354685307, - 0.06373895704746246, - 0.04277601093053818, - 0.0983373299241066, - -0.021791910752654076, - -0.02737756073474884, - -0.009172668680548668, - -0.04652843996882439, - -0.04313445836305618, - -0.003167480230331421, - -0.0335027314722538, - -0.007962736301124096, - 0.003202703781425953, - -0.05630065128207207, - 0.0620303638279438, - -0.05055926367640495, - -8.381352381547913e-05, - 0.06155041232705116, - 0.06112568825483322, - 0.011851480230689049, - -0.03068787045776844, - 0.0075075323693454266, - 0.09114453196525574, - 0.11593952775001526, - 0.05264327675104141, - 0.065922312438488, - -0.07420970499515533, - -0.03912065923213959, - 0.027092091739177704, - -1.8327369359250372e-33, - 0.054607946425676346, - 0.04037277773022652, - 0.03901923820376396, - 0.01241046842187643, - 0.09535899013280869, - -0.007410767953842878, - -0.07205124944448471, - -0.01602909341454506, - 0.008095518685877323, - 0.034797605127096176, - 0.010254917666316032, - -0.021082831546664238, - -0.05064810439944267, - -0.15747489035129547, - -0.018878739327192307, - -0.06597290188074112, - 0.04137653484940529, - 0.009779572486877441, - 0.07300712168216705, - -0.028761882334947586, - 0.0325266495347023, - 0.03329962119460106, - -0.014451118186116219, - -0.027215249836444855, - -0.005444579757750034, - -0.01534272450953722, - 0.019515756517648697, - 0.022202344611287117, - -0.03210347145795822, - 0.02260056510567665, - 0.0027476155664771795, - -0.0246365237981081, - 0.13610731065273285, - 0.02838871441781521, - 0.04335246607661247, - -0.018115056678652763, - -0.024809375405311584, - -0.008998896926641464, - -0.03750869259238243, - -0.04691854864358902, - -0.005454168654978275, - -0.05334152653813362, - -0.03215663507580757, - -0.0518205463886261, - -0.01692483201622963, - 7.557541539426893e-05, - 0.057969242334365845, - 0.11221028119325638, - 0.02549988031387329, - -0.004597585182636976, - 0.04273567721247673, - 0.007748213596642017, - -0.014094243757426739, - -0.05137662589550018, - 0.0468406043946743, - -0.017855513840913773, - -0.029319604858756065, - -0.06956545263528824, - 0.09505332261323929, - -0.053821172565221786, - 0.07914089411497116, - -0.040523894131183624, - 0.04962174594402313, - -0.018522197380661964, - -0.05399669334292412, - -0.007240683771669865, - 0.007706627715379, - -0.02627011574804783, - -0.0059911892749369144, - 0.03503413870930672, - -0.03321618214249611, - 0.013613582588732243, - -0.0564926452934742, - 0.0022380470763891935, - -0.02955957129597664, - 0.00616774195805192, - -0.05619193613529205, - -0.053244031965732574, - 0.03570204973220825, - 0.022269565612077713, - 0.04897572472691536, - -0.0896250307559967, - -0.041765246540308, - 0.023561052978038788, - 0.024421123787760735, - -0.052866917103528976, - 0.03288322314620018, - -0.04734733700752258, - -0.07115748524665833, - 0.06786515563726425, - -0.018333327025175095, - 0.0020864116959273815, - 0.0027383901178836823, - 0.01061271969228983, - -0.04479089751839638, - -2.018944149138415e-34, - 0.01946333982050419, - 0.044086430221796036, - -0.0516173355281353, - 0.03904927149415016, - -0.04494860768318176, - -0.05011332780122757, - -0.08227333426475525, - -0.002039305865764618, - -0.04571044072508812, - -0.04292415827512741, - 0.059073615819215775, - 0.001226848573423922, - 0.05057672783732414, - -0.017013760283589363, - -0.07523749768733978, - -0.009098879992961884, - 0.1115870475769043, - -0.043558597564697266, - 0.05305679515004158, - 0.010514017194509506, - -0.04913157969713211, - 0.0022036791779100895, - -0.021879250183701515, - -0.030561167746782303, - -0.03164591267704964, - 0.02113303914666176, - -0.036210644990205765, - -0.016786744818091393, - -0.027991337701678276, - -0.017979362979531288, - -0.02643681690096855, - -0.055546585470438004, - -0.060604047030210495, - 0.03205907717347145, - 0.054379940032958984, - 0.051334574818611145, - 0.021601825952529907, - -0.07974281162023544, - 0.01656925305724144, - -0.1028970256447792, - 0.013448002748191357, - -0.02339242212474346, - -0.04313669726252556, - 0.06333959102630615, - -0.011347867548465729, - -0.0013688692124560475, - -0.08862260729074478, - 0.0016136885387822986, - 0.05680185928940773, - 0.08564349263906479, - -0.10816530138254166, - -0.004325877409428358, - -0.10851231217384338, - -0.003603078192099929, - -0.01763860322535038, - 0.04296039417386055, - -0.058378979563713074, - -0.0074080596677958965, - -0.07487190514802933, - 0.029905244708061218, - -0.008162829093635082, - 0.022191207855939865, - 0.02700744941830635, - 0.0072532519698143005, - 0.05926317721605301, - -0.0029201938305050135, - -0.005193192977458239, - 0.07360705733299255, - 0.013908852823078632, - 0.09035390615463257, - 0.08343967795372009, - 0.06759880483150482, - -0.13020360469818115, - -0.019805122166872025, - -0.07988030463457108, - -0.012916913256049156, - 0.04695718362927437, - 0.004193380009382963, - -0.021292787045240402, - 0.023584898561239243, - 0.08745799958705902, - 0.017905578017234802, - -0.03438118100166321, - 0.01657097041606903, - 0.0035891584120690823, - 0.025009438395500183, - -0.033202413469552994, - 0.047262586653232574, - -0.03555088862776756, - 0.057335298508405685, - 0.04835951328277588, - -0.0392119437456131, - 0.030850498005747795, - -0.010515674017369747, - -0.013820183463394642, - -2.6571369104999576e-08, - 0.014992224983870983, - 0.008288579992949963, - -0.08745522052049637, - -0.08537154644727707, - 0.035291336476802826, - -0.0353158563375473, - -0.033936649560928345, - -0.021148663014173508, - -0.07676290720701218, - -0.01782182604074478, - 0.0823449194431305, - 0.028616556897759438, - 0.0685589611530304, - 0.026175348088145256, - 0.034271806478500366, - 0.04005250334739685, - 0.00907465536147356, - -0.04730065166950226, - -0.001741867046803236, - 0.01489108707755804, - 0.03218754380941391, - -0.010528621263802052, - -0.03233154118061066, - -0.0033602635376155376, - 0.014156601391732693, - 0.0319225899875164, - -0.027071978896856308, - -0.0595325231552124, - 0.0352763906121254, - 0.07609441131353378, - 0.05349636822938919, - -0.01689092628657818, - -0.06419415771961212, - -0.028087299317121506, - -0.06703958660364151, - -0.015829723328351974, - 0.06796962767839432, - -0.03194466978311539, - 0.06302083283662796, - -0.03555421903729439, - 0.041748542338609695, - 0.007716824300587177, - 0.03737203776836395, - 0.056130923330783844, - -0.02320992201566696, - -0.029660740867257118, - -0.027147408574819565, - 0.019531400874257088, - -0.06499860435724258, - -0.037302419543266296, - 0.03669002652168274, - 0.01603877544403076, - 0.016065610572695732, - 0.0232948400080204, - 0.02865445613861084, - 0.04150641709566116, - 0.0005327454418875277, - 0.0648808553814888, - 0.015284345485270023, - -0.06204137206077576, - 0.04360411316156387, - -0.019565792754292488, - -0.05147647485136986, - -0.1086549162864685 - ] - }, - { - "type": "NarrativeText", - "element_id": "73b0874758fb74535ea6817963e50dc5", - "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.020206240937113762, - 0.12951597571372986, - 0.08377553522586823, - 0.042781613767147064, - 0.031008265912532806, - 0.06985463947057724, - 0.11802545934915543, - -0.006943183019757271, - 0.01194966584444046, - -0.01587282307446003, - -0.051879920065402985, - 0.05673328414559364, - 0.0200809258967638, - -0.07735715806484222, - 0.0199168398976326, - -0.03506135568022728, - 0.003675963496789336, - 0.06225065514445305, - -0.022381942719221115, - 0.11083511263132095, - 0.047053784132003784, - -0.03191846236586571, - 0.015645677223801613, - 0.0159194003790617, - -0.0589054673910141, - 0.03530493006110191, - 0.025768859311938286, - 0.0034588512498885393, - -0.03781077265739441, - 0.043060220777988434, - -0.004105839412659407, - -0.044189441949129105, - 0.024985792115330696, - 0.02370472066104412, - -0.03160209208726883, - -0.022401465103030205, - -0.02153399959206581, - 0.038860734552145004, - -0.0018124697962775826, - 0.02093333564698696, - 0.03944184631109238, - 0.03592611476778984, - 0.034195687621831894, - 0.04793383181095123, - 0.01748066209256649, - 0.00027098090504296124, - 0.02019420638680458, - -0.04376139119267464, - -0.030024191364645958, - 0.0271692406386137, - -0.049689680337905884, - 0.04730021208524704, - 0.024381227791309357, - -0.15539486706256866, - 0.008370224386453629, - 0.056124478578567505, - 0.04456627741456032, - 0.03630043566226959, - 0.0731128454208374, - 0.0710405707359314, - -0.04017263650894165, - -0.06114162877202034, - 0.07197646051645279, - 0.01850246638059616, - 0.013932736590504646, - 0.05121603608131409, - 0.023403139784932137, - 0.04028164967894554, - -0.05729912221431732, - 0.05013591796159744, - -0.030536945909261703, - 0.0007221999694593251, - 0.015468939207494259, - -0.11082416027784348, - -0.03366214781999588, - -0.022141622379422188, - 0.011651264503598213, - -0.08020786195993423, - -0.004598688334226608, - 0.019665343686938286, - -0.059021882712841034, - -0.04507965222001076, - -0.10408805310726166, - -0.00651812506839633, - -0.0840500071644783, - -0.0919281542301178, - 0.09309028089046478, - -0.011402169242501259, - -0.005010826513171196, - -0.024076180532574654, - -0.0405208058655262, - -0.02130955643951893, - -0.08896397054195404, - 0.028615817427635193, - -0.00292077730409801, - 0.08083636313676834, - -0.019954122602939606, - -0.03179207444190979, - -0.0848674401640892, - 0.017579471692442894, - 0.05745568126440048, - 0.04248340427875519, - -0.04345238208770752, - 0.042891304939985275, - -0.045201703906059265, - -0.014572757296264172, - -0.09100909531116486, - -0.017879344522953033, - -0.07227155566215515, - 0.007943677715957165, - -0.049404483288526535, - -0.024981409311294556, - 0.03735656663775444, - 0.011509985662996769, - 0.07993854582309723, - 0.022334927693009377, - 0.009303181432187557, - 0.05824175104498863, - -0.011989140883088112, - -0.023353230208158493, - -0.004346911329776049, - 0.07217641174793243, - 0.02550886571407318, - 0.06027130037546158, - -0.012160559184849262, - -0.06261356920003891, - 0.041800402104854584, - -3.163587375216883e-33, - 0.031563758850097656, - -0.05821530148386955, - 0.05685441195964813, - -0.053995005786418915, - 0.0942942351102829, - 0.055898070335388184, - -0.04127060994505882, - -0.024190839380025864, - 0.0967966765165329, - -0.007501823361963034, - 0.07606164366006851, - -0.06512581557035446, - -0.07639320194721222, - -0.09658738225698471, - -0.0014412739546969533, - 0.0834561437368393, - -0.013049979694187641, - -0.030703747645020485, - 0.11568799614906311, - -0.06129986792802811, - 0.04513254761695862, - 0.0767102837562561, - -0.003363568102940917, - -0.06794804334640503, - -0.06543359160423279, - 0.02625950239598751, - 0.033392541110515594, - -0.012970225885510445, - 0.03452327474951744, - 0.03072984889149666, - 0.008083149790763855, - -0.006239075213670731, - 0.07763144373893738, - -0.02493339218199253, - 0.03211630508303642, - -0.03454034775495529, - -0.043366506695747375, - -1.1975941561104264e-05, - -0.01228292379528284, - -0.043582815676927567, - -0.0022351678926497698, - -0.002388779306784272, - -0.010763967409729958, - -0.0490192174911499, - -0.10290514677762985, - -0.010432828217744827, - 0.0023269152734428644, - 0.012159875594079494, - 0.009796716272830963, - -0.04641438648104668, - 0.0773298516869545, - -0.019159629940986633, - 0.04067340865731239, - -0.12515036761760712, - -0.016597537323832512, - -0.037446968257427216, - -0.02256210334599018, - 0.027769237756729126, - 0.0941220223903656, - -0.035229869186878204, - 0.052800536155700684, - -0.0853150337934494, - 0.05726965516805649, - 0.023679574951529503, - -0.011398550122976303, - 0.03862198069691658, - -0.029250845313072205, - 0.006520797964185476, - -0.06217475235462189, - 0.0010437773307785392, - -0.033238913863897324, - 0.04564686864614487, - -0.08631336688995361, - -0.040670864284038544, - -0.09217272698879242, - 0.012810568325221539, - -0.09887009114027023, - -0.012768099084496498, - -0.030144939199090004, - 0.023225272074341774, - 0.026605406776070595, - -0.04663009196519852, - -0.04845720902085304, - -0.03969937190413475, - 0.06972546875476837, - -0.04372088983654976, - 0.011204209178686142, - -0.08882999420166016, - -0.010241562500596046, - 0.08738932013511658, - -0.00033993468969129026, - 0.0351625457406044, - 0.015185914002358913, - 0.03600313887000084, - -0.07327541708946228, - 1.0332334760507559e-34, - -0.03117039054632187, - -0.04826400429010391, - 0.03474143147468567, - 0.07790862768888474, - 0.0375639870762825, - -0.015025552362203598, - -0.11607638746500015, - -0.03865442052483559, - -0.013813172467052937, - -0.006157952826470137, - -0.0013500262284651399, - -0.05648373067378998, - 0.011533471755683422, - -0.026408109813928604, - -0.04795568063855171, - -0.046589840203523636, - 0.09586967527866364, - 0.030003156512975693, - -0.062221821397542953, - 0.03532593697309494, - 0.006078818812966347, - -0.06474349647760391, - -0.012039566412568092, - -0.04550515487790108, - 0.06804171949625015, - 0.005955437198281288, - 0.0031284198630601168, - -0.012635494582355022, - -0.09527385234832764, - -0.0028574736788868904, - 0.02778475359082222, - 0.08043226599693298, - -0.04234840348362923, - 0.06639651954174042, - 0.03488646075129509, - -0.0062248073518276215, - -0.03035205602645874, - 0.014665752649307251, - -0.02213640697300434, - -0.015464326366782188, - -0.019275479018688202, - -0.07928955554962158, - -0.0820232555270195, - -0.03999247029423714, - 0.008896835148334503, - 0.02598561719059944, - -0.013959486968815327, - 0.061563797295093536, - 0.110643669962883, - 0.04227675125002861, - -0.04325950890779495, - 0.005848786327987909, - -0.015286489389836788, - -0.008673778735101223, - 0.0036576888523995876, - -0.022588001564145088, - -0.050529077649116516, - 0.008483432233333588, - 0.10826185345649719, - 0.023219671100378036, - -0.02730083465576172, - -0.041363880038261414, - -0.025106480345129967, - -0.07338704913854599, - 0.020329756662249565, - 0.08812788873910904, - 0.0364069789648056, - 0.06843487173318863, - 0.08328745514154434, - -0.03653816878795624, - 0.1285361498594284, - 0.02491997554898262, - -0.024181773886084557, - 0.05908014997839928, - -0.021658388897776604, - 0.07286790013313293, - -0.049558840692043304, - -0.05740758031606674, - -0.04485277086496353, - 0.003638401161879301, - 0.028418412432074547, - -0.027394302189350128, - -0.010077430866658688, - -0.028411822393536568, - -0.017703479155898094, - -0.05260618031024933, - -0.02642355114221573, - 0.05938556417822838, - -0.05396810173988342, - 0.042548779398202896, - 0.06250688433647156, - -0.021676814183592796, - 0.015603083185851574, - -0.0015137487789615989, - 0.015357363037765026, - -3.822838934297579e-08, - -0.04906412586569786, - -0.05203795060515404, - 0.006525204051285982, - -0.014914880506694317, - 0.06046931818127632, - 0.036261361092329025, - 0.052406132221221924, - 0.015142392367124557, - 0.0015692857559770346, - -0.013902605511248112, - 0.10088539868593216, - -0.05303017050027847, - 0.0070328488945961, - 0.04286135360598564, - 0.03630495443940163, - 0.027795182541012764, - -0.0141828628256917, - -0.13576531410217285, - 0.014229429885745049, - 0.0261383056640625, - 0.05622301623225212, - -0.017076734453439713, - 0.057860106229782104, - -0.08714920282363892, - -0.07669667154550552, - 0.006748647429049015, - -0.03315611183643341, - -0.024255719035863876, - -0.0055033075623214245, - 0.07801548391580582, - 0.06804453581571579, - -0.09997420012950897, - -0.059346627444028854, - -0.017244841903448105, - -0.022032303735613823, - 0.0289973933249712, - 0.01903395541012287, - -0.021869715303182602, - 0.1061856597661972, - -0.08663272857666016, - 0.05268500745296478, - 0.055803705006837845, - -0.015509780496358871, - 0.060663677752017975, - -0.0018153405981138349, - 0.0180708859115839, - -0.03514222428202629, - 0.0321413092315197, - -0.10321121662855148, - 0.013819336891174316, - 0.020531298592686653, - 0.0314667671918869, - 0.04016425460577011, - 0.010570003651082516, - 0.03386283665895462, - 0.01891472563147545, - 0.02168339118361473, - 0.03738851845264435, - -0.17384524643421173, - -0.06058356910943985, - 0.0823650062084198, - 0.01576400175690651, - -0.07598790526390076, - 0.0032587896566838026 - ] - }, - { - "type": "NarrativeText", - "element_id": "3b8e76f2baafa3482edb98626c6fd7aa", - "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.0334237739443779, - 0.08513972163200378, - 0.05262717232108116, - 0.05147742107510567, - 0.008624264039099216, - 0.019226737320423126, - 0.11567427217960358, - 0.0748891681432724, - -0.06722357869148254, - -0.047949645668268204, - -0.011045103892683983, - -0.03488224372267723, - -0.05327503755688667, - -0.024843355640769005, - 0.014417222701013088, - -0.06916353851556778, - 0.048521146178245544, - -0.08373995870351791, - 0.046426765620708466, - 0.025284674018621445, - 0.09441140294075012, - -0.050212543457746506, - 0.12197326123714447, - 0.049122147262096405, - 0.030315903946757317, - 0.05504322424530983, - -0.02652476727962494, - 0.03156885877251625, - 0.016133638098835945, - -0.04025481268763542, - -0.06404859572649002, - -0.02736051380634308, - -0.015811819583177567, - -0.0029734710697084665, - -0.02746821567416191, - 0.058456845581531525, - -0.022867096588015556, - -0.03450412303209305, - 0.07089599967002869, - 0.04768503084778786, - -0.015782726928591728, - -0.02368755266070366, - -0.08858254551887512, - 0.10066483169794083, - -0.0631270483136177, - -0.04608485475182533, - 0.025854069739580154, - 0.009028585627675056, - -0.015042525716125965, - 0.0015956509159877896, - -0.057843416929244995, - -0.09996753185987473, - -0.011185608804225922, - -0.03724578395485878, - 0.08959483355283737, - 0.04921356588602066, - 0.02411816269159317, - -0.05085308849811554, - 0.009689891710877419, - 0.0010730825597420335, - -0.03301776200532913, - -0.0316394679248333, - -0.02160092443227768, - 0.11823239177465439, - -0.01059714239090681, - -0.020074592903256416, - -0.016708754003047943, - 0.0668322965502739, - -0.08344876021146774, - 0.04243999719619751, - -0.0020189890637993813, - -0.03526168316602707, - 0.014384073205292225, - -0.0011467061704024673, - -0.016982214525341988, - -0.015549605712294579, - 0.0546073354780674, - 0.0283591840416193, - 0.008411301299929619, - -0.039047304540872574, - 0.08302674442529678, - -0.019280124455690384, - -0.06942667812108994, - 0.004916740581393242, - -0.0011368121486157179, - -0.006244057789444923, - 0.09663217514753342, - -0.104483962059021, - -0.023231064900755882, - 0.023865217342972755, - -0.055118925869464874, - -0.04533776268362999, - -0.01909509487450123, - 0.01215389370918274, - 0.022195113822817802, - 0.10891316086053848, - -0.08467862010002136, - -0.040171630680561066, - -0.019441239535808563, - 0.00880073569715023, - -0.0065575046464800835, - -0.02166094072163105, - -0.0035863975062966347, - 0.038118019700050354, - -0.07967104762792587, - 0.004411518573760986, - -0.04446792230010033, - -0.11413455009460449, - -0.0031046043150126934, - -0.014155177399516106, - -0.03359289839863777, - -0.10191155970096588, - 0.04423774033784866, - -0.01634969376027584, - 0.03867436200380325, - 0.00789119303226471, - -0.0013088815612718463, - 0.005726095754653215, - -0.08642798662185669, - 0.007591322064399719, - 0.057926245033741, - -0.06650466471910477, - 0.03430334851145744, - 0.05817404016852379, - -0.07935775816440582, - 0.03984684497117996, - 0.03623027727007866, - -3.5282815588236206e-33, - 0.09347352385520935, - 0.03490927442908287, - -0.047751061618328094, - 0.05143330991268158, - 0.012835165485739708, - -0.014820020645856857, - 0.03752562031149864, - -0.02287108078598976, - -0.05201808735728264, - 0.10732631385326385, - -0.060552485287189484, - -0.04543081298470497, - -0.023168183863162994, - 0.02206038311123848, - -0.042797885835170746, - 0.06273534893989563, - 0.020183425396680832, - 0.02096334658563137, - 0.03245825693011284, - 0.01664106547832489, - 0.08178989589214325, - 0.047091949731111526, - -0.08770085126161575, - -0.06072268262505531, - 0.011261857114732265, - 0.030453678220510483, - 0.0052911145612597466, - -0.008912337943911552, - 0.002271136734634638, - 0.0017309295944869518, - -0.09111656248569489, - 0.07851098477840424, - 0.009972517378628254, - -0.005782611668109894, - 0.03086969628930092, - 0.02096429467201233, - -0.07526246458292007, - -0.08612782508134842, - -0.019093088805675507, - -0.008296051993966103, - 0.04687991738319397, - 0.009091454558074474, - -0.06302816420793533, - 0.00597031693905592, - -0.05079472064971924, - 0.011675463989377022, - -0.015259363688528538, - 0.07252682745456696, - 0.04954379051923752, - -0.056695692241191864, - 0.011587153188884258, - 0.05181189253926277, - 0.0008874403429217637, - -0.010242917574942112, - 0.08723440766334534, - 0.02765694633126259, - -0.047034282237291336, - -0.015987541526556015, - 0.11399177461862564, - 0.029004734009504318, - -0.014162846840918064, - -0.038933318108320236, - 0.027395043522119522, - -0.01913175918161869, - -0.03436695784330368, - -0.011862339451909065, - -0.048628758639097214, - 0.024184146896004677, - 0.03001609444618225, - 0.03736414760351181, - -0.06791726499795914, - 0.02303498610854149, - -0.10504088550806046, - -0.025439808145165443, - -0.03434756025671959, - -0.024942820891737938, - 0.00408831937238574, - -0.0030367441941052675, - -0.014883888885378838, - -0.09496494382619858, - 0.03237718343734741, - -0.06538841873407364, - 0.028552798554301262, - 0.10747817903757095, - -0.0029743092600256205, - -0.05891750752925873, - 0.04938989877700806, - -0.056418683379888535, - -0.03881530091166496, - 0.07755856961011887, - -0.05135304108262062, - -0.018850741907954216, - -0.047057341784238815, - 0.06388959288597107, - -0.05305340886116028, - 3.2636787906439814e-34, - 0.05793430283665657, - 0.03429603576660156, - -0.09402100741863251, - 0.013472452759742737, - -0.07261441648006439, - -0.011468267999589443, - -0.0676976814866066, - -0.006202956195920706, - -0.07966680824756622, - -0.05549564212560654, - -0.019214535132050514, - 0.004230211954563856, - 0.04485606402158737, - -0.04262787103652954, - -0.07189714908599854, - -0.0037761912681162357, - 0.057648852467536926, - -0.0708005428314209, - 0.05060948431491852, - 0.020890962332487106, - 0.02605673298239708, - -0.024573003873229027, - -0.08855784684419632, - 0.023050373420119286, - -0.00701942341402173, - 0.02311650849878788, - 0.06808691471815109, - -3.30068614857737e-05, - -0.16635796427726746, - 0.03618655353784561, - 0.02362920343875885, - -0.07929817587137222, - -0.04195615276694298, - 0.040910642594099045, - 0.06580492109060287, - 0.09183643758296967, - 0.07680416852235794, - -0.06866258382797241, - -0.027831315994262695, - -0.05698476359248161, - 0.036186590790748596, - -0.015769021585583687, - 0.013524159789085388, - 0.05054645612835884, - 0.01702508144080639, - 0.016595035791397095, - 0.00011453445040388033, - 0.050440166145563126, - 0.0016380517045035958, - -0.014796828851103783, - 0.03619819134473801, - -0.043746598064899445, - -0.05902784317731857, - -0.016839053481817245, - -0.0684865191578865, - 0.028295667842030525, - 0.036887142807245255, - -0.022060928866267204, - 0.07970874011516571, - -0.020057976245880127, - -0.004229525104165077, - 0.03774590790271759, - 0.007963254116475582, - 0.008670439943671227, - 0.018695710226893425, - -0.05416541546583176, - -0.00802430510520935, - -0.045783448964357376, - 0.08465134352445602, - -0.007806641049683094, - 0.01399241667240858, - -0.023009605705738068, - -0.020857185125350952, - -0.0475294403731823, - -0.0401642769575119, - 0.05187416449189186, - 0.0012261562515050173, - -0.14704670011997223, - -0.004782191943377256, - 0.03220463544130325, - 0.028961429372429848, - -0.017328929156064987, - 0.005208924412727356, - 0.039198849350214005, - 0.03868243843317032, - 0.16185303032398224, - -0.003337549977004528, - 0.07442822307348251, - 0.013632482849061489, - 0.010453833267092705, - 0.03932604566216469, - -0.04941476881504059, - 0.026289017871022224, - -0.034113798290491104, - 0.027384193614125252, - -2.633965046072717e-08, - 0.026281708851456642, - 0.049991294741630554, - -0.014689208008348942, - -0.009161603637039661, - 0.04411311075091362, - 0.023217033594846725, - -0.12026068568229675, - 0.006136561743915081, - -0.0401003323495388, - -0.034193698316812515, - 0.05503888055682182, - 0.03386237472295761, - 0.07241026312112808, - 0.02967972867190838, - 0.09800057858228683, - 0.07632963359355927, - 0.03416603058576584, - 0.06656737625598907, - -0.07427143305540085, - -0.02448747679591179, - -0.06688807159662247, - 0.027105392888188362, - 0.0028684388380497694, - -0.07122629135847092, - -0.008340755477547646, - 0.013545742258429527, - 0.080804742872715, - 0.0014690610114485025, - 0.058291349560022354, - 0.0001274582464247942, - 0.03562508895993233, - -0.017966819927096367, - -0.02497529238462448, - -0.00837686937302351, - 0.020359273999929428, - 0.09904240816831589, - 0.08526665717363358, - -0.011286655440926552, - 0.13558290898799896, - -0.10274514555931091, - -0.025302665308117867, - 0.05128594487905502, - 0.03121301159262657, - 0.03983764722943306, - -0.12721706926822662, - 0.022358782589435577, - -0.07814671844244003, - -0.06490528583526611, - -0.01744556613266468, - -0.0017374091548845172, - 0.026376785710453987, - -0.024805530905723572, - -0.05245836079120636, - 0.0686231181025505, - 0.011838868260383606, - -0.01298182737082243, - -0.06295139342546463, - -0.026889149099588394, - -0.020131288096308708, - -0.05993514880537987, - 0.0318981297314167, - -0.006793243810534477, - -0.003658904926851392, - -0.019297674298286438 - ] - }, - { - "type": "NarrativeText", - "element_id": "fbc14cba30b1dc3c20bd0bcbb36d7de5", - "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", - "metadata": { - "data_source": { - "url": "example-docs/book-war-and-peace-1p.txt", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "embeddings": [ - -0.0306203942745924, - 0.1165851280093193, - 0.0684056282043457, - 0.03385866805911064, - -0.011450370773673058, - 0.06567824631929398, - 0.10147761553525925, - -0.027841193601489067, - -0.10256843268871307, - -0.09672969579696655, - -0.11508267372846603, - -0.03473299369215965, - -0.007586787920445204, - -0.09611302614212036, - 0.018453529104590416, - 0.033795565366744995, - -0.003642762079834938, - -0.05235380679368973, - -0.04780542105436325, - 0.08546854555606842, - 0.05943774804472923, - 0.05845273658633232, - -0.010093249380588531, - 0.09116564691066742, - 0.011204483918845654, - 0.04561160132288933, - 0.04551238194108009, - 0.02610761858522892, - -0.04407988116145134, - 0.020904818549752235, - -0.10361950099468231, - -0.006478449329733849, - -0.0042344131506979465, - 0.03857744112610817, - 0.019719019532203674, - -0.011069501750171185, - 0.017892351374030113, - 0.011813894845545292, - -0.011747485026717186, - 0.014486805535852909, - 0.011824664659798145, - -0.02422603964805603, - -0.019338440150022507, - 0.08651665598154068, - 0.006315338890999556, - -0.04796777293086052, - -0.031754713505506516, - -0.03359660878777504, - 0.02698991633951664, - 0.028787758201360703, - -0.0037169456481933594, - 0.03733440116047859, - -0.016810575500130653, - -0.11789623647928238, - 0.0058207125402987, - 0.02434416115283966, - 0.008419899269938469, - -0.00315121584571898, - 0.09586749225854874, - -0.04183383285999298, - 0.00568788405507803, - -0.006997744552791119, - 0.016519788652658463, - 0.08096304535865784, - 0.030177759006619453, - -0.014325330965220928, - 0.013215743005275726, - 0.027095049619674683, - -0.10174217820167542, - 0.022875776514410973, - 0.09313110262155533, - 0.007237196434289217, - 0.024886278435587883, - -0.033063821494579315, - -0.08371337503194809, - -0.07932297885417938, - 0.044889695942401886, - -0.03888685256242752, - 0.01924283802509308, - 0.009797284379601479, - -0.017630847170948982, - -0.13485990464687347, - -0.10429353266954422, - 0.02015082538127899, - -0.034101951867341995, - -0.02568935602903366, - 0.07359835505485535, - -0.03787154331803322, - -0.04844769462943077, - 0.03811435401439667, - 0.009081241674721241, - -0.04421189799904823, - -0.023422127589583397, - 0.04396010562777519, - -0.04531797021627426, - 0.1374182254076004, - -0.00552446348592639, - 0.01614023931324482, - 0.02549630217254162, - -0.007060108706355095, - 0.00549334567040205, - 0.039969127625226974, - 0.005201140884310007, - -0.008771786466240883, - -0.07763218134641647, - -0.019097693264484406, - -0.02769896201789379, - -0.03197123482823372, - -0.038363344967365265, - -0.1188419982790947, - -0.04141637310385704, - -0.09703484177589417, - 0.04636283963918686, - -0.012623779475688934, - 0.007494242396205664, - 0.0012106564827263355, - 0.026815541088581085, - -0.024071451276540756, - -0.10793619602918625, - -0.030899010598659515, - 0.0378304198384285, - 0.0586252436041832, - -0.07164493203163147, - 0.06962019205093384, - -0.037138212472200394, - -0.0009952755644917488, - 0.013093151152133942, - -8.43375136057751e-34, - 0.07010412216186523, - 0.05530247464776039, - 0.056457072496414185, - 0.07233106344938278, - 0.041066206991672516, - 0.020495232194662094, - -0.07546687871217728, - 0.009956964291632175, - 0.0041334680281579494, - -0.0011218650033697486, - 0.015150320716202259, - -0.023769546300172806, - -0.04326251149177551, - -0.09433014690876007, - -0.022363299503922462, - -0.042428892105817795, - -0.06810394674539566, - -0.034027501940727234, - 0.014063555747270584, - -0.08960604667663574, - 0.03640619292855263, - 0.1341436207294464, - -0.0325639434158802, - -0.010535547509789467, - -0.06925466656684875, - 0.001424105721525848, - 0.055096060037612915, - -0.043437667191028595, - -0.009649471379816532, - 0.05104508996009827, - -0.05629602447152138, - 0.02744131162762642, - 0.015394657850265503, - -0.0710221379995346, - -0.010381126776337624, - -0.006378490012139082, - -0.08509371429681778, - 0.004385754466056824, - -0.09540266543626785, - -0.022966789081692696, - -0.023332208395004272, - -0.003187950002029538, - -0.044991519302129745, - 0.0868440568447113, - -0.0010858632158488035, - -0.07022508233785629, - -0.07152992486953735, - 0.06745173782110214, - 0.026167329400777817, - 0.0028156929183751345, - 0.017324190586805344, - -0.013740839436650276, - 0.0315944142639637, - 0.015321983024477959, - 0.09027782827615738, - 0.002379694487899542, - -0.0026804632507264614, - -0.004132779315114021, - 0.07261539995670319, - -0.020885169506072998, - 0.1340552419424057, - -0.013763709925115108, - 0.018531663343310356, - -0.005064078606665134, - 0.01773449033498764, - 0.034849539399147034, - -0.05173833295702934, - 0.021430131047964096, - 0.0334097295999527, - -0.09922835975885391, - -0.021377183496952057, - 0.04216054081916809, - -0.027194513007998466, - 0.023806732147932053, - -0.06812924891710281, - 0.016043059527873993, - -0.010363663546741009, - -0.0026226206682622433, - -0.07356515526771545, - -0.029667679220438004, - -0.06297770887613297, - -0.07508224248886108, - 0.031849104911088943, - 0.05637374147772789, - 0.05295383930206299, - -0.04085122048854828, - 0.013757181353867054, - -0.07462412118911743, - 0.02377920225262642, - 0.09099094569683075, - -0.07036615908145905, - 0.04347921535372734, - 0.030033981427550316, - -0.00618325499817729, - -0.06637343764305115, - -1.1228178775680484e-33, - 0.009592464193701744, - -0.01360669918358326, - 0.028936197981238365, - 0.10430671274662018, - -0.06853879988193512, - -0.07210863381624222, - -0.07098358869552612, - 0.02395089901983738, - -0.021944666281342506, - -0.048914242535829544, - -0.020974956452846527, - -0.04364921152591705, - 0.102988101541996, - -0.05039966478943825, - -0.010445753112435341, - -0.015687011182308197, - 0.09286481142044067, - -0.006173871457576752, - -0.009836404584348202, - 0.017719466239213943, - -0.02863561362028122, - -0.05707544833421707, - -0.04548691585659981, - -0.026321809738874435, - 0.019869035109877586, - 0.059911780059337616, - 0.029891567304730415, - -0.11091006547212601, - -0.056568246334791183, - -0.02880302630364895, - 0.04321066290140152, - -0.018915170803666115, - -0.029463427141308784, - 0.028913136571645737, - 0.02016356773674488, - 0.046942342072725296, - 0.08859186619520187, - -0.04014616832137108, - 0.030233409255743027, - -0.012781508266925812, - 0.03141099214553833, - -0.10608062148094177, - -0.10153602808713913, - 0.039286546409130096, - -0.004890939686447382, - -0.016299132257699966, - 0.03805619850754738, - 0.027883367612957954, - 0.0879654660820961, - 0.060353875160217285, - 0.0223167072981596, - -0.0018401117995381355, - -0.01965010166168213, - 0.011339123360812664, - -0.011818121187388897, - -0.054191067814826965, - -0.03173193708062172, - -0.03331925347447395, - 0.08423362672328949, - 0.013324874453246593, - 0.0013668700121343136, - 0.03309522196650505, - 0.017510762438178062, - 0.001457185368053615, - 0.02378447726368904, - 0.10100596398115158, - 0.02051946148276329, - 0.05345296114683151, - 0.10831999778747559, - -0.03295895829796791, - 0.004178327973932028, - -0.05156242102384567, - -0.08082640916109085, - 0.051948003470897675, - 0.040467068552970886, - 0.04691655933856964, - -0.024723317474126816, - -0.12952785193920135, - -0.069209024310112, - 0.03425818681716919, - 0.038761891424655914, - -0.027142923325300217, - -0.05694182217121124, - 0.002577257575467229, - 0.09436210244894028, - -0.08792437613010406, - 0.005559555720537901, - -0.006851498037576675, - -0.0009211382130160928, - 0.02207968384027481, - 0.011961853131651878, - -0.06731318682432175, - 0.007805254310369492, - -0.050293032079935074, - 0.01892649568617344, - -3.565120465509608e-08, - 0.008370264433324337, - -0.015773314982652664, - -0.024207452312111855, - 0.007023857440799475, - 0.05515003949403763, - -0.010496318340301514, - -0.021936068311333656, - -0.02049190178513527, - -0.012689802795648575, - -0.022893957793712616, - 0.06425683945417404, - 0.002712785266339779, - 0.00456151133403182, - -0.005903101060539484, - 0.04811226576566696, - 0.06165697053074837, - 0.06413765251636505, - -0.1156829372048378, - -0.03321158513426781, - -0.07866068929433823, - -0.0101939607411623, - -0.007599474396556616, - -0.0036520815920084715, - 0.009702208451926708, - -0.046618930995464325, - 0.034715645015239716, - -0.000925147149246186, - 0.04593956097960472, - 0.034810151904821396, - 0.02056424506008625, - 0.04330766201019287, - -0.010698845610022545, - -0.07941724359989166, - 0.02538035623729229, - 0.030375370755791664, - 0.008141190744936466, - 0.01681923121213913, - 0.04162432253360748, - 0.09563546627759933, - -0.12796121835708618, - -0.03156562149524689, - 0.10483846813440323, - -0.049203772097826004, - 0.06143992766737938, - 0.0326172336935997, - -0.027665095403790474, - -0.0672636479139328, - 0.059206586331129074, - -0.0777387022972107, - -0.01384445559233427, - -0.0027378168888390064, - -0.05468805506825447, - 0.10512179881334305, - 0.03607570379972458, - 0.0031893912237137556, - 0.06391050666570663, - 0.0354059599339962, - 0.04482095688581467, - -0.04115289822220802, - 0.004169674590229988, - 0.04572826996445656, - 0.0200749933719635, - -0.010630205273628235, - -0.00033097731648012996 - ] - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.docx.json b/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.docx.json deleted file mode 100644 index 8ac98be539..0000000000 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.docx.json +++ /dev/null @@ -1,702 +0,0 @@ -[ - { - "type": "CompositeElement", - "element_id": "36385872440a208d3521a8a885d5f873", - "text": "US Trustee Handbook\n\nCHAPTER 1\n\nINTRODUCTION\n\nCHAPTER 1 – INTRODUCTION\n\nA.\tPURPOSE", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "emphasized_text_contents": [ - "CHAPTER 1", - "INTRODUCTION" - ], - "emphasized_text_tags": [ - "b", - "b" - ], - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJztlclu2zAQhl9F4LnWQq3MLUgDOJfYcJxTYghcRrYQkRQoqnVq5N1LOnHrFD60KArURU/iLJz5Zz4Qetgh6ECCsnUr0EWAUg6UVLyBWORlVRSEMsA8S/OGNHksKPoQIAmWCmqpy98hf6gHPRoOe7sHI9thaLUa6rekhx2SWvhwmiZV9bJyNUbT+XbRRkuIzKgUmOizNk/RqAZrRm5HA+K9YWGw9bGnbtXa+aIwjGBLZd/BRGg+RBuqBNP6aZL0oXNs0Ytr2LQd2Ofeq0C077uWU+tERp+UCHUPaiu7RhtJ7TDRTdNycDdHv5jQyRK90RzcVGotu/AQ8bvYABVg6kZr6z6HBr1pJTXPPqGjaj1Sp9MvAoFao5WXY2Frfeb9XbA042ABgumbbH/rUGi6L+8H+BFUIwipqMgyATnJ85KkFRZl4Q4lY7jiZwsKZL+hQ/vFXfNLqrlW1o39ur+r6eV8eb0IErQ6kWrp+jWN7cN/CPkJos7bO0+tRskcrosgOWb8XfQR2WVrOzgFllUiJiLPuABcVbEoqUhJUUBMY84ZY/8m2Jvb5WL28f5qeTO7PSe273T/FN40L/KKZk2JhaPKGhwXHFjBU4yBNPhs8f4tRL69tuBxxHGSBr+MiBeAS5K4X2CSVYJkOC/LJM3Am2lCsv+IfhPRZfho5/eL+ezu+gSQ1VeZAvFy" - } - }, - { - "type": "CompositeElement", - "element_id": "80f07bed3e4ae2a612e9180c2f1a2f6b", - "text": "The United States Trustee appoints and supervises standing trustees and monitors and supervises cases under chapter 13 of title 11 of the United", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g" - } - }, - { - "type": "CompositeElement", - "element_id": "91d26c5ec7f727ece12679cf6b80f90d", - "text": "le 11 of the United States Code. 28 U.S.C. § 586(b). The Handbook, issued as part of our duties under 28 U.S.C. § 586, establishes or clarifies the", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "a9012d27166d14371c4c502ef0b949e1", - "text": "es or clarifies the position of the United States Trustee Program (Program) on the duties owed by a standing trustee to the debtors, creditors, other", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "20447c8f42ed2b919bd0e5707e7899ae", - "text": "s, creditors, other parties in interest, and the United States Trustee. The Handbook does not present a full and complete statement of the law; it", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "e34c56af21b43f4179f996ddea901bc4", - "text": "ment of the law; it should not be used as a substitute for legal research and analysis. The standing trustee must be familiar with relevant", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "55e660e5b0d0ec6ee5476621e556d6c8", - "text": "iliar with relevant provisions of the Bankruptcy Code, Federal Rules of Bankruptcy Procedure (Rules), any local bankruptcy rules, and case law. 11", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "a9335be161a6a7a080ff78e4e07cbadb", - "text": ", and case law. 11 U.S.C. § 321, 28 U.S.C. § 586, 28 C.F.R. § 58.6(a)(3). Standing trustees are encouraged to follow Practice Tips identified in", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "5f2d61a46e9d16ce346eacc25321a250", - "text": "Tips identified in this Handbook but these are not considered mandatory.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJx1VNtu2zgQ/ZWBnhzAka24Sb3t2wYo9qkomvSpLQyKHFlEKFLgJY4R5N97RrbbdN0ChkUN53LmzBl9fa7Y8cA+b6yp3lGlTLe6aa7bFbfLK7NcdlfXev1m/YZNs9b/cFfNqRo4K6Oygv9zJYdNCiVqnt5HjoNNyQafNkenr8/VEIxcr1bNev3yHTlKdFJu0YeBF7F4z3GxC/FhUXzKsehcIpvfXzKnvHlt2Vi/hW1R1wt+UsPo+NIEnRa98qYN4eGyGWsYnqoXFOys47wfeWpyHJ3VKgPk4tGbOozsnwbXhTionC5D11nNiCxCTA1YZoxBM7ry28HVpxvhwim/LQowpM+K/baS7kZYNr4MLUfYG6mf+SlL7fue6Yu3mQ3dZYWe6D6WlJkJqIL1ORHgUyog8tEm3KcMAypTPjgeHIaAJCGeeWsl/8UbjqR7NWY8mxWFjrLNjqlppvMZiltMqCa6WtOX+q6+relbWS7VW7pe38zaC9wI8P+OzM4JIy4IVolGFbOkhATIlGx/Vv9TqjlhYKp1NvXwC4DoVLSdBAmkMSQrU/kzxBNRn2LYRjXQ7Hi4IESI+7F82CGm3ZM6o45yODhyK9zNSUNG9nAMuIhTN5LDevzAHeDOJ4r/Cud/1JAJuPMh04hgqAQwuuLclEQHUWlmAZantTt16tTuPdlMqQ/FmSm+ZSrpwDE6KW3CAAtiIVNyvFWOpICKup9yK6/cPtl0xHPW+oCH5OzUYJ1VkXY290jh+FF5QRsgIFnbE6R/lX+IZcx6P4ljTh8YY0XZz8Xx5PXK45MsiMFW0my6vhDW9uSCRkD7yy/K5YFRUar0DcAQ5e9SWV01878ICNbb+kP9+ZW1vpmpi9lKVHp3vizAxF5DnlhKIwrognNhB8hKZyw63dsRAzeYhijRyOxzb9OvkbYlCyOAK8lkNho8IQLqoQFeChLa1/I9OH1iPqoY8YF55HvZ+5fvPwApvt/g", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "0a43a9535c06cfbe46bebddcdd5a46e0", - "text": "Nothing in this Handbook should be construed to excuse the standing trustee from complying with all duties imposed by the Bankruptcy Code and Rules,", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJyVUk1z2jAQ/Ss7PoONQyjQW5tLTzk05JRmPLK0xhpkyZVWgIfhv3dlSIZmMp3pTdqv996+fTllaLBDS5VW2VfIlvN6/UWu52op1qtmreq7+8XiTizum/liWa9m2QSyDkkoQYLrT1l6VMFFL3H89+g7HYJ2NlTXopdT1jmV0vN5uVqdX3lG9CbBFa3rsPDRWvTFwfldEW0gHyVFj+rvD2Gg6jZSabvlWJHnBR5F1xucKidD0Qqraud207LPOXDMzgzYaIM09IlFJvreaCmISRZ7q3LXoz12pnG+ExSmrmm0RO6MaTE501K9dxJZld12Jn/LpF0YYbdRMI2kM0O7zZK6niOVjV2NnuNlwic8UsJ+dNTyGNAW+BHgx5UshNZFo6BGkG4UiQrIAR5lDMi1CIG4NvVyMhAiNN51XM3KhxQ+aGpBGAMqksYAuutd4Cn1MLZ/F3bnY09ygAe2A3gY/IwGwwSMk8KAv3xSnCWjD+CasVOyvZQDbD4jceVtHenmAvRsNTHsEwm2DDbXukOLFvfoxxLe516nI3mDeF8Di2/YHAoXOSl3w46J/Q833q+QXGFJ1Ib3xd03+wlIKcQoH82YQB0JLOKoC/img7PcOaRnOhNe0pDGDBAt02I2zMUfNDulmQLfFiom9Y3BmBRFcn4Ajw16tDJZ89H9MZf8/sSpCZQlPOdP+UMOv+JsJpZQzkpg9gF/55N/UUgn+nb1j8J7vvk9btIpnl//AAF6axU=" - } - }, - { - "type": "CompositeElement", - "element_id": "2ff156994a8c58d8a5c91918a543ec28", - "text": "tcy Code and Rules, local rules, and orders of the court. The standing trustee should notify the United States Trustee whenever the provision of the", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJyVUk1z2jAQ/Ss7PoONQyjQW5tLTzk05JRmPLK0xhpkyZVWgIfhv3dlSIZmMp3pTdqv996+fTllaLBDS5VW2VfIlvN6/UWu52op1qtmreq7+8XiTizum/liWa9m2QSyDkkoQYLrT1l6VMFFL3H89+g7HYJ2NlTXopdT1jmV0vN5uVqdX3lG9CbBFa3rsPDRWvTFwfldEW0gHyVFj+rvD2Gg6jZSabvlWJHnBR5F1xucKidD0Qqraud207LPOXDMzgzYaIM09IlFJvreaCmISRZ7q3LXoz12pnG+ExSmrmm0RO6MaTE501K9dxJZld12Jn/LpF0YYbdRMI2kM0O7zZK6niOVjV2NnuNlwic8UsJ+dNTyGNAW+BHgx5UshNZFo6BGkG4UiQrIAR5lDMi1CIG4NvVyMhAiNN51XM3KhxQ+aGpBGAMqksYAuutd4Cn1MLZ/F3bnY09ygAe2A3gY/IwGwwSMk8KAv3xSnCWjD+CasVOyvZQDbD4jceVtHenmAvRsNTHsEwm2DDbXukOLFvfoxxLe516nI3mDeF8Di2/YHAoXOSl3w46J/Q833q+QXGFJ1Ib3xd03+wlIKcQoH82YQB0JLOKoC/img7PcOaRnOhNe0pDGDBAt02I2zMUfNDulmQLfFiom9Y3BmBRFcn4Ajw16tDJZ89H9MZf8/sSpCZQlPOdP+UMOv+JsJpZQzkpg9gF/55N/UUgn+nb1j8J7vvk9btIpnl//AAF6axU=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "7c43851f864b7ccc35150c93d06abe80", - "text": "he provision of the Handbook conflicts with the local rules or orders of the court. The standing trustee is accountable for all duties set forth in", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJyVUk1z2jAQ/Ss7PoONQyjQW5tLTzk05JRmPLK0xhpkyZVWgIfhv3dlSIZmMp3pTdqv996+fTllaLBDS5VW2VfIlvN6/UWu52op1qtmreq7+8XiTizum/liWa9m2QSyDkkoQYLrT1l6VMFFL3H89+g7HYJ2NlTXopdT1jmV0vN5uVqdX3lG9CbBFa3rsPDRWvTFwfldEW0gHyVFj+rvD2Gg6jZSabvlWJHnBR5F1xucKidD0Qqraud207LPOXDMzgzYaIM09IlFJvreaCmISRZ7q3LXoz12pnG+ExSmrmm0RO6MaTE501K9dxJZld12Jn/LpF0YYbdRMI2kM0O7zZK6niOVjV2NnuNlwic8UsJ+dNTyGNAW+BHgx5UshNZFo6BGkG4UiQrIAR5lDMi1CIG4NvVyMhAiNN51XM3KhxQ+aGpBGAMqksYAuutd4Cn1MLZ/F3bnY09ygAe2A3gY/IwGwwSMk8KAv3xSnCWjD+CasVOyvZQDbD4jceVtHenmAvRsNTHsEwm2DDbXukOLFvfoxxLe516nI3mDeF8Di2/YHAoXOSl3w46J/Q833q+QXGFJ1Ib3xd03+wlIKcQoH82YQB0JLOKoC/img7PcOaRnOhNe0pDGDBAt02I2zMUfNDulmQLfFiom9Y3BmBRFcn4Ajw16tDJZ89H9MZf8/sSpCZQlPOdP+UMOv+JsJpZQzkpg9gF/55N/UUgn+nb1j8J7vvk9btIpnl//AAF6axU=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "7caf69b806daa033d686fae6100f4d7c", - "text": "duties set forth in this Handbook, but need not personally perform any duty unless otherwise indicated. All statutory references in this Handbook", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJyVUk1z2jAQ/Ss7PoONQyjQW5tLTzk05JRmPLK0xhpkyZVWgIfhv3dlSIZmMp3pTdqv996+fTllaLBDS5VW2VfIlvN6/UWu52op1qtmreq7+8XiTizum/liWa9m2QSyDkkoQYLrT1l6VMFFL3H89+g7HYJ2NlTXopdT1jmV0vN5uVqdX3lG9CbBFa3rsPDRWvTFwfldEW0gHyVFj+rvD2Gg6jZSabvlWJHnBR5F1xucKidD0Qqraud207LPOXDMzgzYaIM09IlFJvreaCmISRZ7q3LXoz12pnG+ExSmrmm0RO6MaTE501K9dxJZld12Jn/LpF0YYbdRMI2kM0O7zZK6niOVjV2NnuNlwic8UsJ+dNTyGNAW+BHgx5UshNZFo6BGkG4UiQrIAR5lDMi1CIG4NvVyMhAiNN51XM3KhxQ+aGpBGAMqksYAuutd4Cn1MLZ/F3bnY09ygAe2A3gY/IwGwwSMk8KAv3xSnCWjD+CasVOyvZQDbD4jceVtHenmAvRsNTHsEwm2DDbXukOLFvfoxxLe516nI3mDeF8Di2/YHAoXOSl3w46J/Q833q+QXGFJ1Ib3xd03+wlIKcQoH82YQB0JLOKoC/img7PcOaRnOhNe0pDGDBAt02I2zMUfNDulmQLfFiom9Y3BmBRFcn4Ajw16tDJZ89H9MZf8/sSpCZQlPOdP+UMOv+JsJpZQzkpg9gF/55N/UUgn+nb1j8J7vvk9btIpnl//AAF6axU=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "f4412be8c7b2624c729af85c85b3a0e4", - "text": "es in this Handbook refer to the Bankruptcy Code, 11 U.S.C. § 101 et seq., unless otherwise indicated.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJyVUk1z2jAQ/Ss7PoONQyjQW5tLTzk05JRmPLK0xhpkyZVWgIfhv3dlSIZmMp3pTdqv996+fTllaLBDS5VW2VfIlvN6/UWu52op1qtmreq7+8XiTizum/liWa9m2QSyDkkoQYLrT1l6VMFFL3H89+g7HYJ2NlTXopdT1jmV0vN5uVqdX3lG9CbBFa3rsPDRWvTFwfldEW0gHyVFj+rvD2Gg6jZSabvlWJHnBR5F1xucKidD0Qqraud207LPOXDMzgzYaIM09IlFJvreaCmISRZ7q3LXoz12pnG+ExSmrmm0RO6MaTE501K9dxJZld12Jn/LpF0YYbdRMI2kM0O7zZK6niOVjV2NnuNlwic8UsJ+dNTyGNAW+BHgx5UshNZFo6BGkG4UiQrIAR5lDMi1CIG4NvVyMhAiNN51XM3KhxQ+aGpBGAMqksYAuutd4Cn1MLZ/F3bnY09ygAe2A3gY/IwGwwSMk8KAv3xSnCWjD+CasVOyvZQDbD4jceVtHenmAvRsNTHsEwm2DDbXukOLFvfoxxLe516nI3mDeF8Di2/YHAoXOSl3w46J/Q833q+QXGFJ1Ib3xd03+wlIKcQoH82YQB0JLOKoC/img7PcOaRnOhNe0pDGDBAt02I2zMUfNDulmQLfFiom9Y3BmBRFcn4Ajw16tDJZ89H9MZf8/sSpCZQlPOdP+UMOv+JsJpZQzkpg9gF/55N/UUgn+nb1j8J7vvk9btIpnl//AAF6axU=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "c6d218f7643bbbc7b4eae0712e8fd659", - "text": "This Handbook does not create additional rights against the standing trustee or United States Trustee in favor of other parties.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 1, - "orig_elements": "eJztksuO2zAMRX9F0LrxI7GTuMuuuioGmHQ1GBi0RdlCrAckOk0R5N9LFRkg7aY/0J10SYqHV3y7SVzQoqPeKPlZSMRDXe0HqNS2qQ5d1VS60ti1Xa1hB0MjPwlpkUABAeffZD70ya9xxN/3gNGalIx3qX8kvd2k9SqHd7v6eLy/8xtrXHK7cvYWy7g6h7H84eO5XF2iuI60RlR/XggT9c9Kb9zEWlkUJV7BhgU3yo+pnMGpwfvzpg4FC1d554baLEg/Q6aQEMJiRiCGLC9OFT6gu9pF+2iB0sZrbUbkyjUbUzCWCtGPyFO5yS7FRyR7sYCbVmCMPKdEN8k8XWCld6sdMLJe5/6EV8q9T7NJ4uuDUCiPSThPYowIhAKUMhkLFhHNNFMSMIHhmQXNKBJxHTMItiARovBRfHeGUIlX4vIkTo+AcULDhcNeC8+lUQSIZDAVGfrDh28QI7twwVOGY8q/t6HrKrU/Vu1h2O6PWjW62TaDaqAdh7ZV3e7/NvxzG57//tn7F077wp9+lvf3Xx/JOx8=" - } - }, - { - "type": "CompositeElement", - "element_id": "66ff9b9385d511ca7e71f1e6852d3221", - "text": "B.\tROLE OF THE UNITED STATES TRUSTEE", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxVkFtLAzEQhf/Kkmeb2G4Lu74priiIhXb7VMuSTWa3obmRi66U/ncTsaBvmW8mc86Z/RmBBAU6dIKjuwItS8Yp53VfLnpez5dsUfKh7qu6qlbsdlWimwIpCJTTQNP8GeVH5010DH5qC04J74XRvvsd2p+RMjy3y3JeVZdD2hGdzHLkaBQQF7UGRz6NO5GofXCRheiA/y8C+ND9JZ3QY2IEYwITVVbCjBvmyZFq3htzms0tTmBClyQ4CAnhy2YXiForBaMhmSQfmmNjQU9KDsYpGvzMDINgkH7GfBicbHHrDIOUSo9K4msn30JSPUaabOScCPSIcjqbSKej6sElvsj6AaaQtR/we9isX5ti/VS0z02xe3tpm8di2963zbZoN7tt2zR589VsK4IEdDl8AzVOkRk=" - } - }, - { - "type": "CompositeElement", - "element_id": "1876c502fcbb25fd7b978417aea8dded", - "text": "The Bankruptcy Reform Act of 1978 removed the bankruptcy judge from the responsibilities for daytoday administration of cases. Debtors, creditors,", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxdUstu20AM/BVC50SykqZxemvRcw+Fb0FgrHYpi432AS7XDxj593Jlp017EVZ8zAyHfD43OKPHIFtyzRdo+nHdr8fePQ5u9bAyq/7+wYzj0+e71eP4Ce8emhtoPIpxRozWn5v62OZY2OLyn5A95Uwx5O216Pnc+Ohq+v6+X6/fXhSj8Fzpuil67LiEgNwdIr92JWThYqUwun9/BLNsP0a2FHYa69q2w6PxacZbF23uJhPcEOPrbZ9aDRybNyUcaUY5paqiMSnNZI2oyG4fXBsThqOfx8jeSL6N40gWtbNUY1qV5RJHizpV2Pm5fc9UL2YTdsWojDpng2HX1OmSRrah+AFZ43eVX/AolXszIXwz4ZVLEnuCn1hZ4asViCP0T49rYPRxjw5EK4e/lb+K2yGMHP2SYcxJPaaBZhLCDIoDzpwk6geM8xRIrVqGrNDWZMwtwHccJHK+AasO0uWpfikmsYNkeAE7kEwKskfOCBQElU4ySFy4dQNZEOGgYbAxWOSwCDay5K2eg9zAYSI7QWLcUyx5VlUpxQrmFsZc9Fb2lK+jXkG1LZbZQYgC5Bc9Zq69Tg2oS8NaTQxMu0klmXzVabgK10kvMi5oLWyi5p3Kz7Uv/9GrYy+Air6o+ejYXn0uwVbrLlZQ+H8b+aTw/uLAQKOevypzbT2J9yv7YfgCtqmrf3v5DbNIPGE=" - } - }, - { - "type": "CompositeElement", - "element_id": "5f89702a93c3df34a62905e5dff5c54d", - "text": "Debtors, creditors, and third parties with adverse interests to the trustee were concerned that the court, which previously appointed and supervised", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxdUstu20AM/BVC50SykqZxemvRcw+Fb0FgrHYpi432AS7XDxj593Jlp017EVZ8zAyHfD43OKPHIFtyzRdo+nHdr8fePQ5u9bAyq/7+wYzj0+e71eP4Ce8emhtoPIpxRozWn5v62OZY2OLyn5A95Uwx5O216Pnc+Ohq+v6+X6/fXhSj8Fzpuil67LiEgNwdIr92JWThYqUwun9/BLNsP0a2FHYa69q2w6PxacZbF23uJhPcEOPrbZ9aDRybNyUcaUY5paqiMSnNZI2oyG4fXBsThqOfx8jeSL6N40gWtbNUY1qV5RJHizpV2Pm5fc9UL2YTdsWojDpng2HX1OmSRrah+AFZ43eVX/AolXszIXwz4ZVLEnuCn1hZ4asViCP0T49rYPRxjw5EK4e/lb+K2yGMHP2SYcxJPaaBZhLCDIoDzpwk6geM8xRIrVqGrNDWZMwtwHccJHK+AasO0uWpfikmsYNkeAE7kEwKskfOCBQElU4ySFy4dQNZEOGgYbAxWOSwCDay5K2eg9zAYSI7QWLcUyx5VlUpxQrmFsZc9Fb2lK+jXkG1LZbZQYgC5Bc9Zq69Tg2oS8NaTQxMu0klmXzVabgK10kvMi5oLWyi5p3Kz7Uv/9GrYy+Air6o+ejYXn0uwVbrLlZQ+H8b+aTw/uLAQKOevypzbT2J9yv7YfgCtqmrf3v5DbNIPGE=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "c916e417ed924c556baed9616c3f81ae", - "text": "nted and supervised the trustee, would not impartially adjudicate their rights as adversaries of that trustee. To address these concerns, judicial and", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxdUstu20AM/BVC50SykqZxemvRcw+Fb0FgrHYpi432AS7XDxj593Jlp017EVZ8zAyHfD43OKPHIFtyzRdo+nHdr8fePQ5u9bAyq/7+wYzj0+e71eP4Ce8emhtoPIpxRozWn5v62OZY2OLyn5A95Uwx5O216Pnc+Ohq+v6+X6/fXhSj8Fzpuil67LiEgNwdIr92JWThYqUwun9/BLNsP0a2FHYa69q2w6PxacZbF23uJhPcEOPrbZ9aDRybNyUcaUY5paqiMSnNZI2oyG4fXBsThqOfx8jeSL6N40gWtbNUY1qV5RJHizpV2Pm5fc9UL2YTdsWojDpng2HX1OmSRrah+AFZ43eVX/AolXszIXwz4ZVLEnuCn1hZ4asViCP0T49rYPRxjw5EK4e/lb+K2yGMHP2SYcxJPaaBZhLCDIoDzpwk6geM8xRIrVqGrNDWZMwtwHccJHK+AasO0uWpfikmsYNkeAE7kEwKskfOCBQElU4ySFy4dQNZEOGgYbAxWOSwCDay5K2eg9zAYSI7QWLcUyx5VlUpxQrmFsZc9Fb2lK+jXkG1LZbZQYgC5Bc9Zq69Tg2oS8NaTQxMu0klmXzVabgK10kvMi5oLWyi5p3Kz7Uv/9GrYy+Air6o+ejYXn0uwVbrLlZQ+H8b+aTw/uLAQKOevypzbT2J9yv7YfgCtqmrf3v5DbNIPGE=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "45eb8c16f94112f46ab05c934abc1d84", - "text": "cerns, judicial and administrative functions within the bankruptcy system were bifurcated.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxdUstu20AM/BVC50SykqZxemvRcw+Fb0FgrHYpi432AS7XDxj593Jlp017EVZ8zAyHfD43OKPHIFtyzRdo+nHdr8fePQ5u9bAyq/7+wYzj0+e71eP4Ce8emhtoPIpxRozWn5v62OZY2OLyn5A95Uwx5O216Pnc+Ohq+v6+X6/fXhSj8Fzpuil67LiEgNwdIr92JWThYqUwun9/BLNsP0a2FHYa69q2w6PxacZbF23uJhPcEOPrbZ9aDRybNyUcaUY5paqiMSnNZI2oyG4fXBsThqOfx8jeSL6N40gWtbNUY1qV5RJHizpV2Pm5fc9UL2YTdsWojDpng2HX1OmSRrah+AFZ43eVX/AolXszIXwz4ZVLEnuCn1hZ4asViCP0T49rYPRxjw5EK4e/lb+K2yGMHP2SYcxJPaaBZhLCDIoDzpwk6geM8xRIrVqGrNDWZMwtwHccJHK+AasO0uWpfikmsYNkeAE7kEwKskfOCBQElU4ySFy4dQNZEOGgYbAxWOSwCDay5K2eg9zAYSI7QWLcUyx5VlUpxQrmFsZc9Fb2lK+jXkG1LZbZQYgC5Bc9Zq69Tg2oS8NaTQxMu0klmXzVabgK10kvMi5oLWyi5p3Kz7Uv/9GrYy+Air6o+ejYXn0uwVbrLlZQ+H8b+aTw/uLAQKOevypzbT2J9yv7YfgCtqmrf3v5DbNIPGE=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "709927b67286cccaf8fb25d63667c277", - "text": "Many administrative functions formerly performed by the court were placed within the Department of Justice through the creation of the Program. Among", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1ktuK2zAQhl9l8HVj57CbrntX6EUptBSaXm0Xo1hjW6w0EtIoB0LevSMnC22hd5qD9H//jJ4vFVp0SNwZXX2Aar9dq4fV47Zvt2p4v35cttu1bttNq5eot5uH6h1UDllpxUr6L1U5dMnn2OMcB4zOpGQ8pe7e9HypnNelvNmsnp6uL/JGjrbINZN32MRMhLE5+vjaZEocc885ov47YEzc/ZnpDI2Sa+q6wZNyweJC+z41kyK99/51sQq1JE7VVQQHY5HPoVBUKgRresUC2RxI1z4gnZwdfHSK08IPg+lRbuYymFqwdIi+R3FFo7P1W6XMwioasxKM4rNCGqviLkimo+z2GCW/LvqMJy7aXxWdQWlnyIgTYTggDJn6ApOgIGC0Z5AxzmcN+zPwhNDLiBmOGBGCVYIHR8OTobn4CYOKXJjAD/AlJxYDUok+j9PtesTZb6mX+Hv0Y1SuBvjoPI1z7r9USpyPJJLs58afZFiiH6xkJ7CLood4Y5vfCcEbuuHIKiBlcXMw6S7fTyowRlhtgG9XU90A7CaT4PN9dSBn+UVZVDJpaZ7fzTz5aPj8j4lfeb1ctQmQ1N7KiiAJWBa0GsqK3rb+TcWbs11ZxfXlNzaQEHo=" - } - }, - { - "type": "CompositeElement", - "element_id": "509676fb8d4f77b5f270629dee7a2664", - "text": "the Program. Among the administrative functions assigned to the United States Trustee were the appointment and supervision of chapter 13 trustees./", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1ktuK2zAQhl9l8HVj57CbrntX6EUptBSaXm0Xo1hjW6w0EtIoB0LevSMnC22hd5qD9H//jJ4vFVp0SNwZXX2Aar9dq4fV47Zvt2p4v35cttu1bttNq5eot5uH6h1UDllpxUr6L1U5dMnn2OMcB4zOpGQ8pe7e9HypnNelvNmsnp6uL/JGjrbINZN32MRMhLE5+vjaZEocc885ov47YEzc/ZnpDI2Sa+q6wZNyweJC+z41kyK99/51sQq1JE7VVQQHY5HPoVBUKgRresUC2RxI1z4gnZwdfHSK08IPg+lRbuYymFqwdIi+R3FFo7P1W6XMwioasxKM4rNCGqviLkimo+z2GCW/LvqMJy7aXxWdQWlnyIgTYTggDJn6ApOgIGC0Z5AxzmcN+zPwhNDLiBmOGBGCVYIHR8OTobn4CYOKXJjAD/AlJxYDUok+j9PtesTZb6mX+Hv0Y1SuBvjoPI1z7r9USpyPJJLs58afZFiiH6xkJ7CLood4Y5vfCcEbuuHIKiBlcXMw6S7fTyowRlhtgG9XU90A7CaT4PN9dSBn+UVZVDJpaZ7fzTz5aPj8j4lfeb1ctQmQ1N7KiiAJWBa0GsqK3rb+TcWbs11ZxfXlNzaQEHo=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "7ced6d1ee6cc9478adfd8e2a613be42a", - "text": "apter 13 trustees./ This Handbook is issued under the authority of the Program’s enabling statutes. ", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1ktuK2zAQhl9l8HVj57CbrntX6EUptBSaXm0Xo1hjW6w0EtIoB0LevSMnC22hd5qD9H//jJ4vFVp0SNwZXX2Aar9dq4fV47Zvt2p4v35cttu1bttNq5eot5uH6h1UDllpxUr6L1U5dMnn2OMcB4zOpGQ8pe7e9HypnNelvNmsnp6uL/JGjrbINZN32MRMhLE5+vjaZEocc885ov47YEzc/ZnpDI2Sa+q6wZNyweJC+z41kyK99/51sQq1JE7VVQQHY5HPoVBUKgRresUC2RxI1z4gnZwdfHSK08IPg+lRbuYymFqwdIi+R3FFo7P1W6XMwioasxKM4rNCGqviLkimo+z2GCW/LvqMJy7aXxWdQWlnyIgTYTggDJn6ApOgIGC0Z5AxzmcN+zPwhNDLiBmOGBGCVYIHR8OTobn4CYOKXJjAD/AlJxYDUok+j9PtesTZb6mX+Hv0Y1SuBvjoPI1z7r9USpyPJJLs58afZFiiH6xkJ7CLood4Y5vfCcEbuuHIKiBlcXMw6S7fTyowRlhtgG9XU90A7CaT4PN9dSBn+UVZVDJpaZ7fzTz5aPj8j4lfeb1ctQmQ1N7KiiAJWBa0GsqK3rb+TcWbs11ZxfXlNzaQEHo=", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "2c82d3fa4252275d5309a640eb25cd68", - "text": "C.\tSTATUTORY DUTIES OF A STANDING TRUSTEE\t", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxVkMtuwjAQRX8l8rrYDRRoukOFVmxAgrCoAEVOPAkWfsmPNhXi32ujIrU7z5kZ33tnf0EgQILyFWfoJUNPRZEP82I8mtK6qOFxDDCmUEBOGZvQ6QQ9ZEiCp4x6GucvKD0qp4Nt4FYbsJI7x7Vy1e/Q/oKkZqk9GuXPz9dj/CNYkeTISUsgNigFlnxpeyZBOW9D44MF9r/w4Hz1l1RcdZERjAn0VBoBA6YbR05UsVrr8yA3OIIeXaNgywX4b5NcIGqM4A310ST5VAxrA6qXotVWUu8Gum15A3EzpMPgaIsZqxuIqVQnBb530i0EVV2g0UbKiUB1KKUzkVQqyBps5MOk76H3SfsVH/y2nJW7cr35yOa7crnYZuu3bJZFupovV+9Zudlty8XicBO4ey65F4Cuxx84MZOC" - } - }, - { - "type": "CompositeElement", - "element_id": "a819e32a65d1f545cb404fe3f6273357", - "text": "The standing trustee has a fiduciary responsibility to the bankruptcy estate. The standing trustee is more than a mere disbursing agent. The", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1Ustu2zAQ/JWFTi1gU37FVnsNUKCXHhrnlAbCilzZhPkQ+HBtGP73LtUaSAL3JGp3Z3ZmyJdLRYYsudRqVX2FatF82ahV33Wr+VJuZvjQbJqmn80a3KzXsllUE6gsJVSYkOcvVTm00ecgafwfKFgdo/Yutv+GXi6V9aq0l8t501xfmSMHU9bVe2+pDtk5CvVvHw51djGFLFMOpN7/JIqpfVtptdtxrRaiphPawdBUeRnrPTrVeX+YzgfBhVN15YW9NpTOQ1FR4TAYLTGxyProlPADuZM1vQ8WU5z6vteSGJlLMIJlqSF4SezK7awRt07JwqDbZWQZxWdFblcVdwNXWpdtR4Hri7I/0SmV3ds9QUyskLmArcREBHuMgNBrlaXGcIZAceAAdaeNTmdIHhLDOnSHkIckz8C2MZEAuEunI1gfiEHomNcSn5WOXQ7FAbA4tvUfrOUPdAR8j9E7NOYM2h29OZLiw6jjNsqxhTFE5vrej617UnxgPsn3HCE77AwVPwwtcY8glZPmJiPfGy9F37OBj6yT+7tG6dpaUprDYeGojjrSOPzsdGIHTyW2CNu/CNa9aOBZPIlHAb/ybIYbeGjWn7rPk9J4FN/EzzcNseIOS+w5TyfL5ntDSx4S5WncXtsPDCWmI23LE7i+/gGINzKx" - } - }, - { - "type": "CompositeElement", - "element_id": "9e98089003e3b42ed7f1c263335dee3c", - "text": "bursing agent. The standing trustee must be personally involved in the trustee operation. If the standing trustee is or becomes unable to perform", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1Ustu2zAQ/JWFTi1gU37FVnsNUKCXHhrnlAbCilzZhPkQ+HBtGP73LtUaSAL3JGp3Z3ZmyJdLRYYsudRqVX2FatF82ahV33Wr+VJuZvjQbJqmn80a3KzXsllUE6gsJVSYkOcvVTm00ecgafwfKFgdo/Yutv+GXi6V9aq0l8t501xfmSMHU9bVe2+pDtk5CvVvHw51djGFLFMOpN7/JIqpfVtptdtxrRaiphPawdBUeRnrPTrVeX+YzgfBhVN15YW9NpTOQ1FR4TAYLTGxyProlPADuZM1vQ8WU5z6vteSGJlLMIJlqSF4SezK7awRt07JwqDbZWQZxWdFblcVdwNXWpdtR4Hri7I/0SmV3ds9QUyskLmArcREBHuMgNBrlaXGcIZAceAAdaeNTmdIHhLDOnSHkIckz8C2MZEAuEunI1gfiEHomNcSn5WOXQ7FAbA4tvUfrOUPdAR8j9E7NOYM2h29OZLiw6jjNsqxhTFE5vrej617UnxgPsn3HCE77AwVPwwtcY8glZPmJiPfGy9F37OBj6yT+7tG6dpaUprDYeGojjrSOPzsdGIHTyW2CNu/CNa9aOBZPIlHAb/ybIYbeGjWn7rPk9J4FN/EzzcNseIOS+w5TyfL5ntDSx4S5WncXtsPDCWmI23LE7i+/gGINzKx", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "d476b15e5336342b1da22d100849b23c", - "text": "s unable to perform the duties and responsibilities of a standing trustee, the standing trustee must immediately advise the United States Trustee. 28", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1Ustu2zAQ/JWFTi1gU37FVnsNUKCXHhrnlAbCilzZhPkQ+HBtGP73LtUaSAL3JGp3Z3ZmyJdLRYYsudRqVX2FatF82ahV33Wr+VJuZvjQbJqmn80a3KzXsllUE6gsJVSYkOcvVTm00ecgafwfKFgdo/Yutv+GXi6V9aq0l8t501xfmSMHU9bVe2+pDtk5CvVvHw51djGFLFMOpN7/JIqpfVtptdtxrRaiphPawdBUeRnrPTrVeX+YzgfBhVN15YW9NpTOQ1FR4TAYLTGxyProlPADuZM1vQ8WU5z6vteSGJlLMIJlqSF4SezK7awRt07JwqDbZWQZxWdFblcVdwNXWpdtR4Hri7I/0SmV3ds9QUyskLmArcREBHuMgNBrlaXGcIZAceAAdaeNTmdIHhLDOnSHkIckz8C2MZEAuEunI1gfiEHomNcSn5WOXQ7FAbA4tvUfrOUPdAR8j9E7NOYM2h29OZLiw6jjNsqxhTFE5vrej617UnxgPsn3HCE77AwVPwwtcY8glZPmJiPfGy9F37OBj6yT+7tG6dpaUprDYeGojjrSOPzsdGIHTyW2CNu/CNa9aOBZPIlHAb/ybIYbeGjWn7rPk9J4FN/EzzcNseIOS+w5TyfL5ntDSx4S5WncXtsPDCWmI23LE7i+/gGINzKx", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "8f8c9c0919f7502bd2fabad0b12ad664", - "text": "States Trustee. 28 U.S.C. § 586(b), 28 C.F.R. § 58.4(b) referencing 28 C.F.R. § 58.3(b).", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJx1Ustu2zAQ/JWFTi1gU37FVnsNUKCXHhrnlAbCilzZhPkQ+HBtGP73LtUaSAL3JGp3Z3ZmyJdLRYYsudRqVX2FatF82ahV33Wr+VJuZvjQbJqmn80a3KzXsllUE6gsJVSYkOcvVTm00ecgafwfKFgdo/Yutv+GXi6V9aq0l8t501xfmSMHU9bVe2+pDtk5CvVvHw51djGFLFMOpN7/JIqpfVtptdtxrRaiphPawdBUeRnrPTrVeX+YzgfBhVN15YW9NpTOQ1FR4TAYLTGxyProlPADuZM1vQ8WU5z6vteSGJlLMIJlqSF4SezK7awRt07JwqDbZWQZxWdFblcVdwNXWpdtR4Hri7I/0SmV3ds9QUyskLmArcREBHuMgNBrlaXGcIZAceAAdaeNTmdIHhLDOnSHkIckz8C2MZEAuEunI1gfiEHomNcSn5WOXQ7FAbA4tvUfrOUPdAR8j9E7NOYM2h29OZLiw6jjNsqxhTFE5vrej617UnxgPsn3HCE77AwVPwwtcY8glZPmJiPfGy9F37OBj6yT+7tG6dpaUprDYeGojjrSOPzsdGIHTyW2CNu/CNa9aOBZPIlHAb/ybIYbeGjWn7rPk9J4FN/EzzcNseIOS+w5TyfL5ntDSx4S5WncXtsPDCWmI23LE7i+/gGINzKx", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "eff9d6f3a0cdb968b7715e2e417e12ea", - "text": "Although this Handbook is not intended to be a complete statutory reference, the standing trustee’s primary statutory duties are set forth in 11", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxtUclu2zAQ/ZWBzrY2B7WcW9FLTrk0PSWBQIlDiwg3kMPYgeF/79C12xTIbfa3zPOpQoMWHY1aVvdQ7b6p/q6f2gnFNPS4UbJV290Gt8M0DDvVVyuoLJKQggTPn6oSjMnnOOMlDxitTkl7l8br0POpsl6W9mbTDcP5lW/kaApcs3iLTczOYWwOPr412SWKeaYcUf6fECYaP1dG7fZca+q6waOwweBa+jk1i3By8v5t3YWaC8fqzIBKG6SPUFhUIgSjZ0FMsnl3svYB3dEa5aMVlNZeKT0jb+ZiTM20ZIh+Rlbl9tbUt07xwgi3z4JpFJ0Vun1V1AWujC7bCSPX+4JPeKSC/d3Q4vN+AVp0gocrVeDYeQLtCJ1ECeRhQhAw+6KLEBIJyuTjB0RUGNHNuOIbl4aTTAzYl0SIL7lvu12CELUVPP5vUWbSmEBEXkIClksLI0LXwa/6Z/2jhpfctmIL3abtV3BY9Fz6s4/BR8H2w/QJHRK/Dry6kLie5mxeRCCMsL3xSQyUnfwSaNve1QBPC6a/JxjPZMnapkwXqsUWo62miyt/NCtvjD+w6Pvyg9tbH0Vkmvodn4rX59ffvvr+kw==" - } - }, - { - "type": "CompositeElement", - "element_id": "9864d90bf9febdd104e7eac4c56689ba", - "text": "are set forth in 11 U.S.C. § 1302, which incorporates by reference some of the duties of chapter 7 trustees found in 11 U.S.C. § 704. These duties", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxtUclu2zAQ/ZWBzrY2B7WcW9FLTrk0PSWBQIlDiwg3kMPYgeF/79C12xTIbfa3zPOpQoMWHY1aVvdQ7b6p/q6f2gnFNPS4UbJV290Gt8M0DDvVVyuoLJKQggTPn6oSjMnnOOMlDxitTkl7l8br0POpsl6W9mbTDcP5lW/kaApcs3iLTczOYWwOPr412SWKeaYcUf6fECYaP1dG7fZca+q6waOwweBa+jk1i3By8v5t3YWaC8fqzIBKG6SPUFhUIgSjZ0FMsnl3svYB3dEa5aMVlNZeKT0jb+ZiTM20ZIh+Rlbl9tbUt07xwgi3z4JpFJ0Vun1V1AWujC7bCSPX+4JPeKSC/d3Q4vN+AVp0gocrVeDYeQLtCJ1ECeRhQhAw+6KLEBIJyuTjB0RUGNHNuOIbl4aTTAzYl0SIL7lvu12CELUVPP5vUWbSmEBEXkIClksLI0LXwa/6Z/2jhpfctmIL3abtV3BY9Fz6s4/BR8H2w/QJHRK/Dry6kLie5mxeRCCMsL3xSQyUnfwSaNve1QBPC6a/JxjPZMnapkwXqsUWo62miyt/NCtvjD+w6Pvyg9tbH0Vkmvodn4rX59ffvvr+kw==", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "a91f963bcd1c092bffb844453aafa499", - "text": "704. These duties include, but are not limited to, the following:", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "page_number": 2, - "orig_elements": "eJxtUclu2zAQ/ZWBzrY2B7WcW9FLTrk0PSWBQIlDiwg3kMPYgeF/79C12xTIbfa3zPOpQoMWHY1aVvdQ7b6p/q6f2gnFNPS4UbJV290Gt8M0DDvVVyuoLJKQggTPn6oSjMnnOOMlDxitTkl7l8br0POpsl6W9mbTDcP5lW/kaApcs3iLTczOYWwOPr412SWKeaYcUf6fECYaP1dG7fZca+q6waOwweBa+jk1i3By8v5t3YWaC8fqzIBKG6SPUFhUIgSjZ0FMsnl3svYB3dEa5aMVlNZeKT0jb+ZiTM20ZIh+Rlbl9tbUt07xwgi3z4JpFJ0Vun1V1AWujC7bCSPX+4JPeKSC/d3Q4vN+AVp0gocrVeDYeQLtCJ1ECeRhQhAw+6KLEBIJyuTjB0RUGNHNuOIbl4aTTAzYl0SIL7lvu12CELUVPP5vUWbSmEBEXkIClksLI0LXwa/6Z/2jhpfctmIL3abtV3BY9Fz6s4/BR8H2w/QJHRK/Dry6kLie5mxeRCCMsL3xSQyUnfwSaNve1QBPC6a/JxjPZMnapkwXqsUWo62miyt/NCtvjD+w6Pvyg9tbH0Vkmvodn4rX59ffvvr+kw==", - "is_continuation": true - } - }, - { - "type": "CompositeElement", - "element_id": "6856f7d3b3c9ceab0b35fb2951d30be0", - "text": "Copyright", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/handbook-1p.docx", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "languages": [ - "eng" - ], - "orig_elements": "eJxVUNFuwjAM/BWU59FQGKLwOmk/gVDlJm4bkcSRk2xFiH9fgoa0PSU+n+/OPt8FWnToU2+0OK3Euz52mz0cD6obFCp12LbDvoWuHXG7GXZavK2EwwQaEhT+XdRPHymzwmcdkJ2J0ZCP/S/pfBeOdG3vdm3XPS5FI7OtdnImh5Kz98jym/gqs4+Js0qZUf8vEsbU/0V646eCyaaRuIALFteaVJQzeD0QXddtaAqwiEcxHI3FdAs1hYAQrFGQSkj55XVDAf3i7EjsIMU1jaNRWCZzPUxTYunApLBs5Sdnm1en3mJG0Mj9SJTK8zIIbBzwrRIs+ClDyVkPIdBP4lLjJFxSZX5QuLGZ5qfYa/zzqSYelx9OCJEa" - } - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.json b/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.json new file mode 100644 index 0000000000..41ec2987f8 --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-basic-chunking/handbook-1p.json @@ -0,0 +1,242 @@ +[ + { + "type": "CompositeElement", + "element_id": "36385872440a208d3521a8a885d5f873", + "text": "US Trustee Handbook\n\nCHAPTER 1\n\nINTRODUCTION\n\nCHAPTER 1 \u2013 INTRODUCTION\n\nA.\tPURPOSE", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "emphasized_text_contents": [ + "CHAPTER 1", + "INTRODUCTION" + ], + "emphasized_text_tags": [ + "b", + "b" + ], + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJztlU1v2jAYx79K5PMgifNm71Z1leilIAqnFkWO/RiiJnbkOBsd4rvPhtK1E4dt0qQx7RQ/74//P1l52CFooAVly1qgjwFKODBKuIRIZAXJc8oqwDxNMkllFgmGPgSoBcsEs8zl75A/lL0eDIeD3YFp676vterLl6SHHWq18OEkiQnZr1wPA1wbUTaaM6vNsZLZjV8h3OgWQjMoBSb8os1TOKjemoHbwYB4b8CWtV0DI6F5H26YEpXWT6O4GzvHFu33bpKsG7DPnR+PWNc1tZvotgs/KzHWHaht20htWmb7kZay5uBKB6/I2M0WndEc3HXUum3Gp4gXYQNMgCml1tZ9TgM6U7fMPPuEhqn1wNbQewUQqDVa+XUsbK3PXN4HCzP0FiCYvOztq06NJof2yFX8SEgKSgkTaSogo1lW0IRgUeTuUFQVJvzyCEHbbVhffwVRenVKrpV19z0Kdz25mi1u5kGMVmdSLVsf06pD+A+xPoPSeTvnKdXQVuDVid/C/b70G6SL2jZwjmhFRERFlnIBmJBIFEwkNM8hYhHnVVX9Y0Rv7xbz6afl9eJ2endJUN/t/VNckyzPCEtlgYXDWUkc5RyqnCcYA5X48rj+LShe31fwOOAoToJfZsNzwAWN3W8uTomgKc6KIk5S8GYS0/Q/m99lczV+tLPlfDa9vzlDYvUNQ4PhWg==" + } + }, + { + "type": "CompositeElement", + "element_id": "4b484865316731b8db65bd89fae5107a", + "text": "The United States Trustee appoints and supervises standing trustees and monitors and supervises cases under chapter 13 of title 11 of the United States Code. 28 U.S.C. \u00a7 586(b). The Handbook, issued as part of our duties under 28 U.S.C. \u00a7 586, establishes or clarifies the position of the United States Trustee Program (Program) on the duties owed by a standing trustee to the debtors, creditors, other parties in interest, and the United States Trustee. The Handbook does not present a full and", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJx1VNtu2zAM/RXCTymQOnHTdtn2tgLFnoail6etCGSJjoXKkiFRuaDov490krVdWiCIbfF2eHio388FOuzQ08Ka4hsUyjSzy+qinmE9PTPTaXN2oefn83M01Vx/xaYYQ9EhKaNIsf9zIS+LFHLUOHz3GDubkg0+LfZOv5+LLhgxz2bVfP7yyDki6hDNwgWtKMRdpKJWIEza0OEkZu8xTtYhPk2yTxSzphzRvP/Ajep6h6cm6DRplTd1CE+nVV/ywaZ4eeFKjXVI2x6H7vreWa7I6CYrb8rQo990rgmxU5ROQ9NYjRyahZGSa5s+Bo3cjl92rjxYhASn/DKrJSZpsEC/LKStnk8WPnc1Sk+V1CfckNS+bxEevCU0cEeKMMF9zIkQgVEF6ykB44eUmcGVTWxPxAdcGWjnuHPoAicJ8chbK/nP3mAE3aqe+FnNIDRAlhxCVQ3vRyiueDQlwNkcHsq78qqEP3k6VV/gYn45qk/YIsB/7qkdA882c7BK0KtIkpJnDyaT/Vf9o1RjQG6ndja17BcYolPRNhIkkPqQrEzlY4gHom5iWEbVwWj/cgIcIe778mHNMfUW1BF1QGHniLVwNwbN6rG718CGOHQjOaznH3PHcMcDxZ/C+Y8aMIFtPhD0HMwqYRhNdm5IooPIlFCA0bBvh06dWn8HS5DakJ0Z4muEnHYccye5TjzAzLEsU3C4VA6kgIq6HXIrr9w22bTHc9R6xw/J2ajOOqsirC21nMLhSnlBG1hAsq8HSD+Uf4q5J70dxDGGa+Sxctnb7HDweuNxIwtieBlhNJhPhLUtyGI7qF/9ohh3jIpSpW8GzKJ8L5XZWTX+REB8elVel7dvTsvLkToZzUSld8fLwpjQa5YnL6URBTTBubBmyEoTLzrc254HbngaokQjs6fWpteR1pmEEYYryWQ2mnniCFYPdOwll9e2lPvgcMX8UjHyBbPCe9n7l8e/SDHcqA==" + } + }, + { + "type": "CompositeElement", + "element_id": "3730ef4e66125803e3f525f35cc85a4d", + "text": "complete statement of the law; it should not be used as a substitute for legal research and analysis. The standing trustee must be familiar with relevant provisions of the Bankruptcy Code, Federal Rules of Bankruptcy Procedure (Rules), any local bankruptcy rules, and case law. 11 U.S.C. \u00a7 321, 28 U.S.C. \u00a7 586, 28 C.F.R. \u00a7 58.6(a)(3). Standing trustees are encouraged to follow Practice Tips identified in this Handbook but these are not considered mandatory.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJx1VNtu2zAM/RXCTymQOnHTdtn2tgLFnoail6etCGSJjoXKkiFRuaDov490krVdWiCIbfF2eHio388FOuzQ08Ka4hsUyjSzy+qinmE9PTPTaXN2oefn83M01Vx/xaYYQ9EhKaNIsf9zIS+LFHLUOHz3GDubkg0+LfZOv5+LLhgxz2bVfP7yyDki6hDNwgWtKMRdpKJWIEza0OEkZu8xTtYhPk2yTxSzphzRvP/Ajep6h6cm6DRplTd1CE+nVV/ywaZ4eeFKjXVI2x6H7vreWa7I6CYrb8rQo990rgmxU5ROQ9NYjRyahZGSa5s+Bo3cjl92rjxYhASn/DKrJSZpsEC/LKStnk8WPnc1Sk+V1CfckNS+bxEevCU0cEeKMMF9zIkQgVEF6ykB44eUmcGVTWxPxAdcGWjnuHPoAicJ8chbK/nP3mAE3aqe+FnNIDRAlhxCVQ3vRyiueDQlwNkcHsq78qqEP3k6VV/gYn45qk/YIsB/7qkdA882c7BK0KtIkpJnDyaT/Vf9o1RjQG6ndja17BcYolPRNhIkkPqQrEzlY4gHom5iWEbVwWj/cgIcIe778mHNMfUW1BF1QGHniLVwNwbN6rG718CGOHQjOaznH3PHcMcDxZ/C+Y8aMIFtPhD0HMwqYRhNdm5IooPIlFCA0bBvh06dWn8HS5DakJ0Z4muEnHYccye5TjzAzLEsU3C4VA6kgIq6HXIrr9w22bTHc9R6xw/J2ajOOqsirC21nMLhSnlBG1hAsq8HSD+Uf4q5J70dxDGGa+Sxctnb7HDweuNxIwtieBlhNJhPhLUtyGI7qF/9ohh3jIpSpW8GzKJ8L5XZWTX+REB8elVel7dvTsvLkToZzUSld8fLwpjQa5YnL6URBTTBubBmyEoTLzrc254HbngaokQjs6fWpteR1pmEEYYryWQ2mnniCFYPdOwll9e2lPvgcMX8UjHyBbPCe9n7l8e/SDHcqA==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "9eeea30e62f966adc8623756fde370c7", + "text": "Nothing in this Handbook should be construed to excuse the standing trustee from complying with all duties imposed by the Bankruptcy Code and Rules, local rules, and orders of the court. The standing trustee should notify the United States Trustee whenever the provision of the Handbook conflicts with the local rules or orders of the court. The standing trustee is accountable for all duties set forth in this Handbook, but need not personally perform any duty unless otherwise indicated. All", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJyVUk1v2zAM/SuEz4kTN82S7Lb1slMPa3vqikCW6FioLHkSlcQo8t9HOmnRFcWA3SR+vffI9/hSoMMOPW2tKb5CsVrUmy96szArtVk3G1NfXS+XV2p53SyWq3o9LyZQdEjKKFJc/1LIY5tCjhrHf4+xsynZ4NP2UvT4UnTBSHqxqNbr0xPPiKhDNFsXtKIQz52KWqEwa0OHs5i9xzg7hPg8yz5RzJpyRPP3B4+q6x1OTdBp1ipv6hCep1VfcuBYnE6M1FiHNPQCX6i+d5YRmd1s700ZevTHzjUhdorSNDSN1citWTZSMrbpY9DIcvyuc+VrRpbglN9ltcMkAgv0u0Jk9RzZ+tzVKJoqwSc8kmDfBmp5DFgP/Ejw48IWUhuyM1Aj6DBqQwMUAI86J+RahERcK72cTIQITQwdV7P0QcIHSy0o58BkspjAdn1IPKUexvbvyj/H3JMe4IbvADwMfmaHaQKyfwfx/JE4S8aYIDRjp+a7Uglw/xmJC28fyDZnoAdviWHvSBGzuL/UHVr0uMc4lvA+91bc8QrxtgYW3/BxKJ3lSO4dOyb2P9x4v0pzhSdVO94Xd7/bT0KSEKN8PMYE6kzgEUddwGZOwXPnIE+xCS9pkDEDZM+0mA1ziQfLl7JMgb2Fhkl9YzAmRZnNPUDEBiN6Laf5eP0xJ/f+5FITqCp4KO/KmxJ+5flcraCaV8DsE/4uJ/+iIBZ9df2tipE9v8d7seLp6Q+7i2fd" + } + }, + { + "type": "CompositeElement", + "element_id": "6f3826b74e8ceab42c39004920df9d73", + "text": "statutory references in this Handbook refer to the Bankruptcy Code, 11 U.S.C. \u00a7 101 et seq., unless otherwise indicated.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJyVUk1v2zAM/SuEz4kTN82S7Lb1slMPa3vqikCW6FioLHkSlcQo8t9HOmnRFcWA3SR+vffI9/hSoMMOPW2tKb5CsVrUmy96szArtVk3G1NfXS+XV2p53SyWq3o9LyZQdEjKKFJc/1LIY5tCjhrHf4+xsynZ4NP2UvT4UnTBSHqxqNbr0xPPiKhDNFsXtKIQz52KWqEwa0OHs5i9xzg7hPg8yz5RzJpyRPP3B4+q6x1OTdBp1ipv6hCep1VfcuBYnE6M1FiHNPQCX6i+d5YRmd1s700ZevTHzjUhdorSNDSN1citWTZSMrbpY9DIcvyuc+VrRpbglN9ltcMkAgv0u0Jk9RzZ+tzVKJoqwSc8kmDfBmp5DFgP/Ejw48IWUhuyM1Aj6DBqQwMUAI86J+RahERcK72cTIQITQwdV7P0QcIHSy0o58BkspjAdn1IPKUexvbvyj/H3JMe4IbvADwMfmaHaQKyfwfx/JE4S8aYIDRjp+a7Uglw/xmJC28fyDZnoAdviWHvSBGzuL/UHVr0uMc4lvA+91bc8QrxtgYW3/BxKJ3lSO4dOyb2P9x4v0pzhSdVO94Xd7/bT0KSEKN8PMYE6kzgEUddwGZOwXPnIE+xCS9pkDEDZM+0mA1ziQfLl7JMgb2Fhkl9YzAmRZnNPUDEBiN6Laf5eP0xJ/f+5FITqCp4KO/KmxJ+5flcraCaV8DsE/4uJ/+iIBZ9df2tipE9v8d7seLp6Q+7i2fd", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "2eda2c36d153971dee6732c17841847a", + "text": "This Handbook does not create additional rights against the standing trustee or United States Trustee in favor of other parties.\n\nB.\tROLE OF THE UNITED STATES TRUSTEE", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJztU0uP0zAQ/isjn2neaRNurAhaJLSLtulpWVWOPU6sJnZkO6Vo1f+OvXSlguDAfW+xZ758jxk/PhMccULl9pKT90AQN2my7mjCsyLZ1EmRiERgXdapoDntCvIOyISOcuqo738m4WNv9WIYvpxnNJO0Vmpl95emx2cyaR7KeZ5W1fnJ/8Mg04bvR82o0+YXkrohSIgHPWFsFqXQxN+1OcSLss4szC0G+e8HPNFpHnHFNbPxQBXvtD6s0jnyFydyPnsmIUd0P+ZAT+g8j9IzenXxUfFIz6hO0yi0maizKy2EZOihS0gk8tx8Npqht6P6aYxeKyGEkap+oT3aYJCg6kmwNfubvVqmDoOnNPA7PLnA3Q7Swu1FInCNFpR2wAxSh0A5l0EWHcHIfnAWaE+ltwpuQLDO47wG8M6tQwRtYKekQw5b5+EW2ktBKhD06MtagPZQAzM1TqKNgujXHO6oMT6FI7ZBnFf55xrUdcLXVVJuumxdCV6IIis6XtCSdWXJ6/xtDf69BtdDvw79q2+78dM+/C3wImfc70Dd5VnH67RgWc5F3VV1VZUsKd8C/593l12P4Cb65h7uvzRw/wna2wZ2d5/b5iNs2w9ts4X2Ybdtm+Z6Tq10I5Lz009QrsIj" + } + }, + { + "type": "CompositeElement", + "element_id": "d915c544c4c9e87377963581aa113103", + "text": "The Bankruptcy Reform Act of 1978 removed the bankruptcy judge from the responsibilities for daytoday administration of cases. Debtors, creditors, and third parties with adverse interests to the trustee were concerned that the court, which previously appointed and supervised the trustee, would not impartially adjudicate their rights as adversaries of that trustee. To address these concerns, judicial and administrative functions within the bankruptcy system were bifurcated.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJxdUkuP2jAQ/iujnCEQKIXtrVXPPVTcVivk2BPiEnussc1DiP/eccL2sZfI8cx8L8/rvcIBHfp0sKb6AlXT7Zpd15hta5abpVo2643qupfPq+W2+4SrTTWDymFSRiUl/feqHA6RMmsc/wOyszFa8vHwbHq9V45MKa/XzW73eBMMRk1sDgNplYinSZX6ImHRk8MFZ++RFxfi0yL7mDjrlBnN/z94VS4MODek46JX3rREp3kTarm4Vo+HMHV2wHQLhb5SIQxWGEXd4uxNTQH91Q0dsVMpzqnrrEYZzSWRWrhNYNIodvzRDfV7pYQwKH/M6oixGKzQH6tiK8jNwWfXYvG0KvwJr6lw73uEb8qfOIekb/ATCyt81Qmog+ZluwNGR2c0kKSz/dv5K5sjQsfkxgpjDBKube1gk8UIggNG3RLJB5Rx1ltJaDRZoLWKGGuA79hK0nEGWoKz01ECE0zLBoLiEexiUy8gZ+SIYH1CoUsREo3cEnxMiHCRa9DkNbIfBas01rXsQZrBpbe6h8B4tpTjIKpCoAJmRsaYZUnONj6tPkFljPJgwFMC60Y9aiizRgIoj4al2zKwPfYiScWnTsVFuDidZExoNexJ6kbkxzIX/+gV2yOgoI9q/k3sLDlnr0t0UxTWf3yNeBN4NyXQ2k72XpSZuqzE+5b9UDyB7cvTP95+AzyBOSk=" + } + }, + { + "type": "CompositeElement", + "element_id": "36e1997b0c7d7f4bb271f757bf0165c0", + "text": "Many administrative functions formerly performed by the court were placed within the Department of Justice through the creation of the Program. Among the administrative functions assigned to the United States Trustee were the appointment and supervision of chapter 13 trustees./ This Handbook is issued under the authority of the Program\u2019s enabling statutes. \n\nC.\tSTATUTORY DUTIES OF A STANDING TRUSTEE\t", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJztUk1P3DAQ/SujnEuy2WUD6Q0V2lKpULHZQwVo5cSTxCKxLXsMu0L73zvOLlKL1EvPvXm+/N6bN/evCQ44oqaNkslHSOpiLk7zZdGUhWjP5stZWcxlWS5KOUNZLE6TD5CMSEIKEtz/msTHxpvgGpxii25U3iuj/ebYdP+ajEbG8mKRn5/vH/kPh41xcjOYRpBxh0lBfaSQ9WbEzAWt0WUvxj1lQXtyoaHgUP4Z4FaMdsATaRqf9ULL2pink9ymnNgm+z0jtWpA2tkInwhrB8WIzC571jI1FvV2HFrjRkH+xLStapBHQ9xIytjSOtMgy9HdOKRvlbiEQeguiA59FJig7pIoy3Jmo8NYY9Q0j/iEW4rY34XegZCj0ooFMIdnhDboJpLxECmgG3bA+5veEuodUI/Q8G4JXtAh2EEwPXhR1Cs9FS/RCkeRE5gWvgVPLIArzoSuP4w7nPTGeox/ONM5MaYAF6PR3ZT7KyvByjvNkGSmxrVWxNGKBKGHyjEe4oHb9I+1RukDHfYCfGA1z8of4ZteWEIH+QLoMOrTDKDqlYevR++A33w+gVGCltw8/RuoN07R7p2IhzCf5aUH1KIe2CLwTCwwtRSiRW+u3wh3UFZFK9iT90d/Wpb5PC+XizNRlzXOlohLgSXmQspCnBX/j/5fj/5T+kCr6qJaV7d3P+FyXV1freD2M1wAZ28ur2++QHW3XlVXVw/0u2WVogGT/eMv4ASdHw==" + } + }, + { + "type": "CompositeElement", + "element_id": "6af0314792236b9c87f69fadd2c49ebf", + "text": "The standing trustee has a fiduciary responsibility to the bankruptcy estate. The standing trustee is more than a mere disbursing agent. The standing trustee must be personally involved in the trustee operation. If the standing trustee is or becomes unable to perform the duties and responsibilities of a standing trustee, the standing trustee must immediately advise the United States Trustee. 28 U.S.C. \u00a7 586(b), 28 C.F.R. \u00a7 58.4(b) referencing 28 C.F.R. \u00a7 58.3(b).", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJx1Ustu2zAQ/JWFTi1gU/IjttprgAK99NA4pzQwVuTKJsyHwIdrw/C/d6nUQFK4Jz52Z3ZmyJdLRYYsubTVqvoK1bz9slbLvuuWs4VcN/jQrtu2b5oW16uVbOfVBCpLCRUm5P5LVTbb6HOQNJ4HClbHqL2L279NL5fKelXKi8Wsba+vzBFI+qC2xktMPrwhMe2LhHrvLdUhO0eh/u3Doc4uppBlyoHUxwOd0A6GpsrLWO/Rqc77w3Q2CL44VdcrT+q1oXQeyvgKh8Fonsjq6qNTwg/kTtb0PlhMcer7XktiaC6JCJ6thuAlsR23s0bcKiUEg26XcUexGKzI7apia+Cbrcu2o+JpXuYnOqUye7MniIklMhewg5iIYI8REHqtstQYzhAoDpyc7rTR6QzJQ2JYh+4Q8pDkGYgpEgmAu3Q6gvWBGISOeS3xXunY5VAcAItjW//BWl6gI+AHjN6hMWfQ7ujNkRRvRh23Vo4tjCEy1/d+LN2T4gPzSX7MCNlhZ6j4YWiJewSpnDQXGfnReLn0PRv4l3Vyf9YoXVtLSnM4LBzVUUcam5+dTuzgqcQWYfOGYN3zFp7Fk3gU8Cs3Da7hoV196j5PSuFRfBM/3xXEkisssec8nSyT7zUtuEmUr3H7bT8wlJiOtClf4Pr6B/mZL3k=" + } + }, + { + "type": "CompositeElement", + "element_id": "9db06205c9d90fede41d1b77d56d26d8", + "text": "Although this Handbook is not intended to be a complete statutory reference, the standing trustee\u2019s primary statutory duties are set forth in 11 U.S.C. \u00a7 1302, which incorporates by reference some of the duties of chapter 7 trustees found in 11 U.S.C. \u00a7 704. These duties include, but are not limited to, the following:\n\nCopyright", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/handbook-1p.docx" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJztUs1u2zAMfhXB59axnXZ2ehsKDDvtsu7UFoYsUbFQWRQkqk1R5N1HpcmWDtsb7CaKP98Pef9WgYMFPI1WVzei2nwy3VU3NRPIaehgbXRj+s0a+mEaho3pqgtRLUBSS5Jc/1aVx5gwRwWHOEBcbEoWfRqPRfdv1YK6pNfrdhj2jzwjgsKoR4dKEsb3TklzobCacYFVzN5DXL1gfFplnyhmRTmC/hjATi7BwaVGlVaz9HpCfLpsQ80fu2q/ZyRjHdBrKPCVDMFZRmR2q2evawzgd4szGBdJ6RKNsQq4NRdHasbWIaICluO3i6tPmWKCk36b5RZSEViB31ZFVuCf0edlgqKpK/gEOyrYnx3NmLezoNkm8fXIVfDbIwnrCbwGLQjFBEIKhUUYgUgkKbNHryKCgQhewQXPOCS8ZmKC7UgE8JC7pt0kEaJdJJf/btSZLCQhIzcBCZZLMyOKthU/6u/1bS0ectPIXrTrprsQL7NVJc8bChglcet0hi4S70egOZA4juZIzTIQRNGf+CQGyl7/Fahvrmoh7mZIv0YwnsuatU2ZDlSLLc4ulg6uvGs26By+sOibsoPTWr/JyDTtM9wVr9n0P6/6Sm+G5lpuejVMCpTqu3a6buXQGuiaaa3/X/XxqmeQGuJoEHmV4wngeFH/OPvzI7/F8Brtdqbz9Xw5TKv2jz8BszGI3w==" + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.json b/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.json new file mode 100644 index 0000000000..6117ef0b45 --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.json @@ -0,0 +1,611 @@ +[ + { + "type": "CompositeElement", + "element_id": "c4302b0c0305a44c9beac9927b20a25b", + "text": "0 2 0 2\n\np e S 0 3\n\n] L C . s c [\n\n3 v 6 0 9 4 0 . 4 0 0 2 : v i X r a\n\nDense Passage Retrieval for Open-Domain Question Answering\n\nVladimir Karpukhin\u2217, Barlas O\u02d8guz\u2217, Sewon Min\u2020, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen\u2021, Wen-tau Yih\n\nFacebook AI\n\n\u2020University of Washington\n\n\u2021Princeton University\n\n{vladk, barlaso, plewis, ledell, edunov, scottyih}@fb.com sewon@cs.washington.edu danqic@cs.princeton.edu\n\nAbstract", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJztV21v2zYQ/iuEPjs2Sb33U7Nmw4Z1a7e2y4Y2CI7k0RYskRop2UmD/PcdnaTNhg5LP8+ALVvk3cPDc8fnyPc3GfY4oJsuO5M9Y1mTW6uMaFWbi7JGXXJZV9oqzbnkthHZgmUDTmBgArK/ydKfy+jnoPHwPmIYuhg77+LlvdH7m2zwJk3nuWia2wvCCKh9MJe91zD5cOcJ0yaFsNr4AVdhdg7Dau/DdjW7OIVZT3NA8/cXvIJh7PHEeB1Xw9xP3Yn2/Ty4EzkuR2Oz21tazHY9TtdjiiCDcew7WpQCXCUDmu7BrWdYY0yxZujWWYpwpJFLNw8KU3gi4Ux4NSUMziSjb/J9gH3nCBPXPnQf0bxNduTwT3I55tbYqlK8xTZXVheFqfKyhRIrDoIfySWMkSF7Q/Tmj+l92009fonS1hglLJeiJV4L2ao6FxyIW6ll0QAcKSWMC/aSvWBLFpkmh6fQmueIwMsG8hqAZMAWSmk0JAvSWm7kkdZEEtuxiiq1ZQU9l4dnkoZnNN6x31lg8NUSgbKuRVXqpinqhheyUAUKYXNTQ6lBHes5YZyhi8heQ4xkw37FKXS4g55ZH9irEd3JmR+gc+yXGWNaiJ26uMfQEfJTih8rqBvRgi3RSrDSViWaxqJorBUVtv+fHPSd28a76OMEgQhyBq9SOppH6fgwSynqRYKZQ59GvrfeT85PuBTZ7X8k87ceTDd0gf0IYZy3m87d47FvIPQQ2asPM5emWc8fHybe4J6S+tPBko4lCyoFKgG9ZS9x38UF/Rjse3Y+J9Owxmv2rZmd3y3YGbg/O/Zig3euYsHOqVwmmNkf3earN6syZSV4YUr6cDRcm6oEMLZtBdQCj/08YXwHGpX3W3b6w5N2nzaFrmVpS62Mpn5TGJVLw9u2rXJKWX0k9W7LUeG/c90OQ+yma+YtO4dIm2c9efckniUvEBtFTYUO/A2vyrolhauLWrW2FaY48vzAs3hNrUMjEcs+M/4kjgXWUFQyz7WqpQBuoSp0BXmlkZqMPB6jEsbNjjrAdsHUQe79glEIBxnvDzK+YHgv3lH7abruNrfPrVpqP7CY+sBzHZf7T6W/JGNmksrrNDE+ZC6NP07ZzxAChb3Df9N2oUpBsq55o1BJuggLRReNXEJrLd3ZymPqCONUUQigpy9shou/AHbkcVc=" + } + }, + { + "type": "CompositeElement", + "element_id": "ee897debf0868ab1630051b7cfdcc1c5", + "text": "Open-domain question answering relies on ef- \ufb01cient passage retrieval to select candidate contexts, where traditional sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can be practically implemented us- ing dense representations alone, where em- beddings are learned from a small number of questions and passages by a simple dual- encoder framework. When evaluated on a wide range of open-domain QA datasets, our dense retriever", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJxtU1lr3DAQ/iuDIW/xnjTZ9K2lhAZ6UAj0IQ3LWBqvRXS4GimbJex/74xzkJY+2ZJG33zH6OaxIU+BYtk627yHpjNLMmZz1l+szKo739jNxtrFZn22MusLWpw1p9AEKmixoNQ/Nvqz5VSzoWk9Ug6O2aXI2+eim8cmJKvH6/VyszneCkYmk7Ld+mSwpPx0E8ugFOZDCjTPNUbK833Kd/MaueRqSs1k/17QA4bRU2uT4XmovrjWJF9DbFfjbLR9czxKs955KodRGTQ4jt5JUyE41wI59hh3FXfEyrWhuGuUoXfxjp/Yc8EsBkVLD7JxvlzKcaGHongzRajZ6//nPqUSU6HZqplUjoK6jTV0pBKXxzf3vo8UhXZAF+F3JVZCgJH3lF3cQSbviEH2qG/hV+27xdI4yQlGZBZYqSjZ0T16KAlYUjQFDEbrxHUCk6J24lPYD5QJSkbrtIfU84iZCe7lRsq6MgQakJdqrmYAZLi+bK8+XYKcf/y6encKqBgDgSXoUe6BDMGQ7Ayuouw7Bg1KmhHwkPayheUNQ+EFHcGY5aqY7/0BnOamg0cWKregoi1FVmFjJpaTKSQG9CnSiw4KrSBZK+U8kfKEOQpGn1MABA6CDk+WQ+pfrZXiaF+8Y+gOWjtxAFvRt0DRiAVZcDCQipnBz4HEfuFfUVlqPrB34kCWgSFFT29C/PEBdN6Z1HR5D69qJhOUTS3yOPqUA2vzkpNI/lINRWonl4EPXCjALhMWsUhIXpy0y4sTwI5lqiVW6VNIAaR5SWO7WvxnHtCYKk4fTifNA/mRJ0IUbVtSKx8l+9xM7MHOOx4g0l5YidQ29a1k3crQq+jpWalR/6jtxLIhYL7j2VIfwcsL+4Y5S3T3dK2jfrz9Ay+ch50=" + } + }, + { + "type": "CompositeElement", + "element_id": "c65370dd6bdf4d3ed979f665912468e3", + "text": "outperforms a strong Lucene- BM25 system greatly by 9%-19% absolute in terms of top-20 passage retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA benchmarks.1", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJxtU1lr3DAQ/iuDIW/xnjTZ9K2lhAZ6UAj0IQ3LWBqvRXS4GimbJex/74xzkJY+2ZJG33zH6OaxIU+BYtk627yHpjNLMmZz1l+szKo739jNxtrFZn22MusLWpw1p9AEKmixoNQ/Nvqz5VSzoWk9Ug6O2aXI2+eim8cmJKvH6/VyszneCkYmk7Ld+mSwpPx0E8ugFOZDCjTPNUbK833Kd/MaueRqSs1k/17QA4bRU2uT4XmovrjWJF9DbFfjbLR9czxKs955KodRGTQ4jt5JUyE41wI59hh3FXfEyrWhuGuUoXfxjp/Yc8EsBkVLD7JxvlzKcaGHongzRajZ6//nPqUSU6HZqplUjoK6jTV0pBKXxzf3vo8UhXZAF+F3JVZCgJH3lF3cQSbviEH2qG/hV+27xdI4yQlGZBZYqSjZ0T16KAlYUjQFDEbrxHUCk6J24lPYD5QJSkbrtIfU84iZCe7lRsq6MgQakJdqrmYAZLi+bK8+XYKcf/y6encKqBgDgSXoUe6BDMGQ7Ayuouw7Bg1KmhHwkPayheUNQ+EFHcGY5aqY7/0BnOamg0cWKregoi1FVmFjJpaTKSQG9CnSiw4KrSBZK+U8kfKEOQpGn1MABA6CDk+WQ+pfrZXiaF+8Y+gOWjtxAFvRt0DRiAVZcDCQipnBz4HEfuFfUVlqPrB34kCWgSFFT29C/PEBdN6Z1HR5D69qJhOUTS3yOPqUA2vzkpNI/lINRWonl4EPXCjALhMWsUhIXpy0y4sTwI5lqiVW6VNIAaR5SWO7WvxnHtCYKk4fTifNA/mRJ0IUbVtSKx8l+9xM7MHOOx4g0l5YidQ29a1k3crQq+jpWalR/6jtxLIhYL7j2VIfwcsL+4Y5S3T3dK2jfrz9Ay+ch50=", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "27d05f433deeece02f1087e7c184ca3c", + "text": "1\n\nIntroduction", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJztkElOAzEQRa8SeU3TU+iBG7APqyhqle1yx8KTPIhA1HfHDiABZ2Bnu75fPf3jlaBCjSYukpPHHRnm/TDSoW/6ZugQ2ETpDCOFuRFi7ERL7nZEYwQOEXL+SsphCTZ5hre7Q69lCNKasHyFjleiLS/jvm+naTtlhkdmPV+UZRCt//wJ8VwU6rPVWPtkDPr61fqXOpkQfWIxeeS/L3gB7RRW3LJQ66SirJhVSZuqc/eOC7JteZmQCuObKwYEnFMyL82CdQnksQKzJlgxFFeCZiXF0OWXxSRNsei1hRPxEgvj1sI38NlkGq7Wy3fkh5LI0b+1dlMvJiroAAPi/gHYzIe+5c0kOHRj0//XmhlPJnrLs0OG/Gz4IKNCsp0+AP3S5ao=" + } + }, + { + "type": "CompositeElement", + "element_id": "4e705a421caaeafb1db2ace440a2162d", + "text": "Open-domain question answering (QA) (Voorhees, 1999) is a task that answers factoid questions us- ing a large collection of documents. While early QA systems are often complicated and consist of multiple components (Ferrucci (2012); Moldovan et al. (2003), inter alia), the advances of reading comprehension models suggest a much simpli\ufb01ed two-stage framework: (1) a context retriever \ufb01rst selects a small subset of passages where some of them contain the answer to the question, and then (2) a", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJylVU1v4zYQ/SsDnWzAH5Jlr+PdU1Cg6KUtFijaw+7CoMWRRYQiVZJyYgT+732j2IUS5LS5iaPhmzdvPvjtOWPLLbu0Nzr7TNm63HJ+2G719m67Wamq3BzWhcrrclceit0dZzPKWk5Kq6Tg/5zJxz76PlQ8nDsOrYnReBf3V6dvz1nrtfwuy+Lu7vIDGIErH/Te+kolH15uqtQIhWXjW16G3jkOy0cfHpa9iyn0VeoD69cHflJtZ3mufRWXbW+TmVfe9q2br7pFp+vsckGw2lhO504YZKrrrEFQEFyKA35b5Y69OnIUrhm7YyYMrXEP8YV9TCpAIKf5SZLY4m/ipyRwf3sfGsZV2PpgxVSZxIvT1V7sdruEZDPweAu0HgOJ48+ArIpPI5RfOUCbyrxFqq/2VV6sjEvBaygICd5FXI0R5caH0Mp8hDb98har9Vb7k3KrPC/RO7UPrXLopfeg1qsRFJrQfgRsk7/KMi8/ALbZFSOwXxp2wo4Ws7eYFX5BtW1gpQ0a7R2sT/n6tfzbnwDZbsa9NabxW+19ch4wZTZMYofO37u+PbCMYXEZ3fuzY4fRapVx9G/PUWpMysVHDohLk6/3U5rcJmBG0qZTMpEUJRUfKDUqXd0j1apK3uj/cSL1cU4Co8iqcGTC4Foe+oh8TZjoXvZSXNA/DeaXWAV7pq/3FM8xcYsogeGY2OFm+zLUrBFP4+yiiUlghp2ADTH4eCeANLlNCU2kg6df6PdrqYlB2C7EnpfTGeglDrAYhUNqmJQ+SQ9Egb7KPyAHRk1k6ZFsOhsp9kcsFKCBQdVQNMLwe18f8gIk06Ofo2BIug6qZVlyn2lSTOEO7iI/0FMwfEL4l1sBYJFFINE3tspaBDlEHtLsVIyyweixYcgSsUHFDMrtgCgVHPgP1aDkh9OtFrNBNVgcMhcSraoa43hIEe6Vkts++P7YoAaydKV0PIDciOob9TigGQ2tTX0efLDrsb5uzUATmZCr1jOSVp4u6N4iBAIAEOtEhPWj9kPdQXrMSzyGXoNGZ7HMkbZTB9QaDwR64XhGXtT0UHk0vqT5GOT5kmKZeO0gDyGD5IBQXUCrmopXUn89vBUIJWk4Zj2UHtUM/iTma/ZIRGbs9sj8oQIYmBP/JZN0+fEfarF6Lw==" + } + }, + { + "type": "CompositeElement", + "element_id": "2fc384b06246b67d3c6407be5ec286a8", + "text": "machine reader can thoroughly exam- ine the retrieved contexts and identify the correct answer (Chen et al., 2017). Although reducing open-domain QA to machine reading is a very rea- sonable strategy, a huge performance degradation is often observed in practice2, indicating the needs of improving retrieval.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJylVU1v4zYQ/SsDnWzAH5Jlr+PdU1Cg6KUtFijaw+7CoMWRRYQiVZJyYgT+732j2IUS5LS5iaPhmzdvPvjtOWPLLbu0Nzr7TNm63HJ+2G719m67Wamq3BzWhcrrclceit0dZzPKWk5Kq6Tg/5zJxz76PlQ8nDsOrYnReBf3V6dvz1nrtfwuy+Lu7vIDGIErH/Te+kolH15uqtQIhWXjW16G3jkOy0cfHpa9iyn0VeoD69cHflJtZ3mufRWXbW+TmVfe9q2br7pFp+vsckGw2lhO504YZKrrrEFQEFyKA35b5Y69OnIUrhm7YyYMrXEP8YV9TCpAIKf5SZLY4m/ipyRwf3sfGsZV2PpgxVSZxIvT1V7sdruEZDPweAu0HgOJ48+ArIpPI5RfOUCbyrxFqq/2VV6sjEvBaygICd5FXI0R5caH0Mp8hDb98har9Vb7k3KrPC/RO7UPrXLopfeg1qsRFJrQfgRsk7/KMi8/ALbZFSOwXxp2wo4Ws7eYFX5BtW1gpQ0a7R2sT/n6tfzbnwDZbsa9NabxW+19ch4wZTZMYofO37u+PbCMYXEZ3fuzY4fRapVx9G/PUWpMysVHDohLk6/3U5rcJmBG0qZTMpEUJRUfKDUqXd0j1apK3uj/cSL1cU4Co8iqcGTC4Foe+oh8TZjoXvZSXNA/DeaXWAV7pq/3FM8xcYsogeGY2OFm+zLUrBFP4+yiiUlghp2ADTH4eCeANLlNCU2kg6df6PdrqYlB2C7EnpfTGeglDrAYhUNqmJQ+SQ9Egb7KPyAHRk1k6ZFsOhsp9kcsFKCBQdVQNMLwe18f8gIk06Ofo2BIug6qZVlyn2lSTOEO7iI/0FMwfEL4l1sBYJFFINE3tspaBDlEHtLsVIyyweixYcgSsUHFDMrtgCgVHPgP1aDkh9OtFrNBNVgcMhcSraoa43hIEe6Vkts++P7YoAaydKV0PIDciOob9TigGQ2tTX0efLDrsb5uzUATmZCr1jOSVp4u6N4iBAIAEOtEhPWj9kPdQXrMSzyGXoNGZ7HMkbZTB9QaDwR64XhGXtT0UHk0vqT5GOT5kmKZeO0gDyGD5IBQXUCrmopXUn89vBUIJWk4Zj2UHtUM/iTma/ZIRGbs9sj8oQIYmBP/JZN0+fEfarF6Lw==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "7663d2b14e0314c98549f34edffe0a5d", + "text": "\u2217Equal contribution 1The code and trained models have been released at\n\nhttps://github.com/facebookresearch/DPR.\n\n2For instance, the exact match score on SQuAD v1.1 drops", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJztlMluHCEQhl8FcfZM03u3b5EcH60sc3OsUQHV08gsHZaJo9G8e0BJJCfyI/gGVPH/X1UJHi8UNRq08agkvSW06QfsFgBkrOkXhs08ibnlUowtnwaB9IZQgxEkRMj5F1oWx+CSz7Gy39AbFYJyNhz/JD1eqHGyhNu2nqbrU9bwKJyXR+0EROd/34S4FoRqdQYrn6xFX/1w/rlKNkSfREwe5b8bfAGzadxJJ0Jlko5qJ5xOxu6abb/JhV6v2WxRGuPPrRBQ2DatsmkGrEpCDmuwpwQnDIWVoj3RQrjlk6NNhmPBq4tOxJdYNL6lpqnHj98TaCKcjV7xVARJfVgxn0gkYCWJHpRFSUrxOpAVzkg4oiU+9xxCjkAs/n/RHsD7DHbGQ/HJhv8Pp2O8H2rORc9lx/p2aMeJsRFG3ktgyN6HkzXWGLdwW1UnFdfE98KZagGB3LlnjwHBi7W6+/Rl/7rzBxU1vtVxALH0yDs5iG7ol3lk88SaZWwF1PMixXvHy59x7zxRmQOswBsS8xvIICISA1GsJOTSkOTX8fVz+nBHzvW+JtK7LbwxgadfspmJMA==" + } + }, + { + "type": "CompositeElement", + "element_id": "e23130449343b46d18d6e94b959cfa63", + "text": "Retrieval in open-domain QA is usually imple- mented using TF-IDF or BM25 (Robertson and Zaragoza, 2009), which matches keywords ef\ufb01- ciently with an inverted index and can be seen as representing the question and context in high- dimensional, sparse vectors (with weighting). Con- versely, the dense, latent semantic encoding is com- plementary to sparse representations by design. For example, synonyms or paraphrases that consist of completely different tokens may still be mapped to vectors close", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJylVV1vGzcQ/CsLPdmATpZkObbbhyJp4CJAEqR2gABNAoPH29Ox4pEXfki+GP7vHfJ0rmzkoWjepONydmY4XH6+n7Dmlk24VdXkF5pc1PXqUq4uVgshpCjLi8VycXq+uqzLs7k8O19MpjRpOYhKBIH6+0n6cettdJLz/45dq7xX1vjbfdHn+0lrq7R8erq4uHj4CgzH0rrqVlspgnXDThGaROGksS2fuGgMu5OddZuTaHxwUYbouHr6h+9E22kuKiv9SRt1UIW0OramWHazrqonDw9oVivNoe8Sg4noOq3QFARPUgGWtTDrKNbsE9cJm/UkMdTKbPzA3gfhYJCp+A4fzldYDXwXEtwR/SWcWNvvgqa0nM8v6XhKu0bJphVBNuw33END5bn+Eutyng2MTqe9UgWeOVuyC96atLnDP1EqrXxQcgLqz3tfXB70Hjv/HOTlIWTa8XNwyMv8APCmcWorULIVwlRv1XPw928+3Czni9Xt2eny8sd4p/MnBBer/4NxfoDxR7RIsH4Os44WKC++RWGC+p4T8mOw1VPHFi/+E1LKVIeU3ZrYwtCE9HCAc83BKd4KTcqQ7dgg063A7z9fkvIUfRRa96Ry3CndWK7wVZk1fbwq3ry+Iuvo1bvlGR1djwdGsPwxn0M8x3TSPp405pPGgBYkFdDRa6dCAwgQ2gIP7bIFGVTic8nkmdHEk+POsceuRCc0TN8i+yR7KLYmqUzCGrVuCqoU+KcRIfSUfCecZ9qyxCDwdJS77hiFCe14Rr9bU2AZRbqfZvQKu3lKWgS0BIk2+SyJjbRVYgC/pG0L6obRJlxPwY6NHrnmc/FU9sDzam1mdAUL9xMFvHpjTd/65Ct2iq5xwsOw0IiQJHlcAbJ16oT6AHLQVdfsEqdgN+AIk3uCD1ons1qMHpgIJqNWqS0I4QMLnIiFNJflelWxe+rjl4hEyU+NTeLSSikqWsc+mapxfolJ+uyg3/+Wq6vxqPeHJYzfgV1FtbNtLh4PZgC/EVrQK7FBa/Qo0Zg2xu4M1ckB64IT/Xi+W2hK4bwR0dmUkPz17QGR60SEkGlt1/1sIEQvKeB5KEoYWcFgH7ilnY26okZsOfk3ZFBikvc4qXwlUk8foUOMhLMwDUVDEsZC3J0nmElzqbPBJQd0HkK/l7s3cM8sp25Y2GvbL6QA15y2DY65Au1kGKnM6HWmMGbPk3Boq70lzcKZTAARE9Xf0T9eD8YEqHJU62hkzuF4WBiwSDsCACSUhHxLKNuy5DuVRm/Icc6OCQrCbwrfsVR7654FfEafkrRckYdL0XJrcSPS40z5KS0oPaY+S81XPDuO6dAyaP1r7j5HlTX8OC3GWTFMolbcqTa2QMHbnaRUeKhxQYWDsqN3mNPH8GZtHSjhZh1hVM6mdPBGZA5vFR2leX78K2FUEwfsmeVPL46PZ2nYjo/5e+EcVG75Y5qiD1//AadLAvA=" + } + }, + { + "type": "CompositeElement", + "element_id": "0514dae8756684388e6b9f3ceef2f411", + "text": "to each other. Consider the question \u201cWho is the bad guy in lord of the rings?\u201d, which can be answered from the context \u201cSala Baker is best known for portraying the villain Sauron in the Lord of the Rings trilogy.\u201d A term-based system would have dif\ufb01culty retrieving such a context, while a dense retrieval system would be able to better match \u201cbad guy\u201d with \u201cvillain\u201d and fetch the cor- rect context. Dense encodings are also learnable by adjusting the embedding functions, which pro- vides", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJylVV1vGzcQ/CsLPdmATpZkObbbhyJp4CJAEqR2gABNAoPH29Ox4pEXfki+GP7vHfJ0rmzkoWjepONydmY4XH6+n7Dmlk24VdXkF5pc1PXqUq4uVgshpCjLi8VycXq+uqzLs7k8O19MpjRpOYhKBIH6+0n6cettdJLz/45dq7xX1vjbfdHn+0lrq7R8erq4uHj4CgzH0rrqVlspgnXDThGaROGksS2fuGgMu5OddZuTaHxwUYbouHr6h+9E22kuKiv9SRt1UIW0OramWHazrqonDw9oVivNoe8Sg4noOq3QFARPUgGWtTDrKNbsE9cJm/UkMdTKbPzA3gfhYJCp+A4fzldYDXwXEtwR/SWcWNvvgqa0nM8v6XhKu0bJphVBNuw33END5bn+Eutyng2MTqe9UgWeOVuyC96atLnDP1EqrXxQcgLqz3tfXB70Hjv/HOTlIWTa8XNwyMv8APCmcWorULIVwlRv1XPw928+3Czni9Xt2eny8sd4p/MnBBer/4NxfoDxR7RIsH4Os44WKC++RWGC+p4T8mOw1VPHFi/+E1LKVIeU3ZrYwtCE9HCAc83BKd4KTcqQ7dgg063A7z9fkvIUfRRa96Ry3CndWK7wVZk1fbwq3ry+Iuvo1bvlGR1djwdGsPwxn0M8x3TSPp405pPGgBYkFdDRa6dCAwgQ2gIP7bIFGVTic8nkmdHEk+POsceuRCc0TN8i+yR7KLYmqUzCGrVuCqoU+KcRIfSUfCecZ9qyxCDwdJS77hiFCe14Rr9bU2AZRbqfZvQKu3lKWgS0BIk2+SyJjbRVYgC/pG0L6obRJlxPwY6NHrnmc/FU9sDzam1mdAUL9xMFvHpjTd/65Ct2iq5xwsOw0IiQJHlcAbJ16oT6AHLQVdfsEqdgN+AIk3uCD1ons1qMHpgIJqNWqS0I4QMLnIiFNJflelWxe+rjl4hEyU+NTeLSSikqWsc+mapxfolJ+uyg3/+Wq6vxqPeHJYzfgV1FtbNtLh4PZgC/EVrQK7FBa/Qo0Zg2xu4M1ckB64IT/Xi+W2hK4bwR0dmUkPz17QGR60SEkGlt1/1sIEQvKeB5KEoYWcFgH7ilnY26okZsOfk3ZFBikvc4qXwlUk8foUOMhLMwDUVDEsZC3J0nmElzqbPBJQd0HkK/l7s3cM8sp25Y2GvbL6QA15y2DY65Au1kGKnM6HWmMGbPk3Boq70lzcKZTAARE9Xf0T9eD8YEqHJU62hkzuF4WBiwSDsCACSUhHxLKNuy5DuVRm/Icc6OCQrCbwrfsVR7654FfEafkrRckYdL0XJrcSPS40z5KS0oPaY+S81XPDuO6dAyaP1r7j5HlTX8OC3GWTFMolbcqTa2QMHbnaRUeKhxQYWDsqN3mNPH8GZtHSjhZh1hVM6mdPBGZA5vFR2leX78K2FUEwfsmeVPL46PZ2nYjo/5e+EcVG75Y5qiD1//AadLAvA=", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "d12417777d4a829f77bb4bb4b02078d7", + "text": "additional \ufb02exibility to have a task-speci\ufb01c representation. With special in-memory data struc- tures and indexing schemes, retrieval can be done ef\ufb01ciently using maximum inner product search (MIPS) algorithms (e.g., Shrivastava and Li (2014); Guo et al. (2016)).", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJylVV1vGzcQ/CsLPdmATpZkObbbhyJp4CJAEqR2gABNAoPH29Ox4pEXfki+GP7vHfJ0rmzkoWjepONydmY4XH6+n7Dmlk24VdXkF5pc1PXqUq4uVgshpCjLi8VycXq+uqzLs7k8O19MpjRpOYhKBIH6+0n6cettdJLz/45dq7xX1vjbfdHn+0lrq7R8erq4uHj4CgzH0rrqVlspgnXDThGaROGksS2fuGgMu5OddZuTaHxwUYbouHr6h+9E22kuKiv9SRt1UIW0OramWHazrqonDw9oVivNoe8Sg4noOq3QFARPUgGWtTDrKNbsE9cJm/UkMdTKbPzA3gfhYJCp+A4fzldYDXwXEtwR/SWcWNvvgqa0nM8v6XhKu0bJphVBNuw33END5bn+Eutyng2MTqe9UgWeOVuyC96atLnDP1EqrXxQcgLqz3tfXB70Hjv/HOTlIWTa8XNwyMv8APCmcWorULIVwlRv1XPw928+3Czni9Xt2eny8sd4p/MnBBer/4NxfoDxR7RIsH4Os44WKC++RWGC+p4T8mOw1VPHFi/+E1LKVIeU3ZrYwtCE9HCAc83BKd4KTcqQ7dgg063A7z9fkvIUfRRa96Ry3CndWK7wVZk1fbwq3ry+Iuvo1bvlGR1djwdGsPwxn0M8x3TSPp405pPGgBYkFdDRa6dCAwgQ2gIP7bIFGVTic8nkmdHEk+POsceuRCc0TN8i+yR7KLYmqUzCGrVuCqoU+KcRIfSUfCecZ9qyxCDwdJS77hiFCe14Rr9bU2AZRbqfZvQKu3lKWgS0BIk2+SyJjbRVYgC/pG0L6obRJlxPwY6NHrnmc/FU9sDzam1mdAUL9xMFvHpjTd/65Ct2iq5xwsOw0IiQJHlcAbJ16oT6AHLQVdfsEqdgN+AIk3uCD1ons1qMHpgIJqNWqS0I4QMLnIiFNJflelWxe+rjl4hEyU+NTeLSSikqWsc+mapxfolJ+uyg3/+Wq6vxqPeHJYzfgV1FtbNtLh4PZgC/EVrQK7FBa/Qo0Zg2xu4M1ckB64IT/Xi+W2hK4bwR0dmUkPz17QGR60SEkGlt1/1sIEQvKeB5KEoYWcFgH7ilnY26okZsOfk3ZFBikvc4qXwlUk8foUOMhLMwDUVDEsZC3J0nmElzqbPBJQd0HkK/l7s3cM8sp25Y2GvbL6QA15y2DY65Au1kGKnM6HWmMGbPk3Boq70lzcKZTAARE9Xf0T9eD8YEqHJU62hkzuF4WBiwSDsCACSUhHxLKNuy5DuVRm/Icc6OCQrCbwrfsVR7654FfEafkrRckYdL0XJrcSPS40z5KS0oPaY+S81XPDuO6dAyaP1r7j5HlTX8OC3GWTFMolbcqTa2QMHbnaRUeKhxQYWDsqN3mNPH8GZtHSjhZh1hVM6mdPBGZA5vFR2leX78K2FUEwfsmeVPL46PZ2nYjo/5e+EcVG75Y5qiD1//AadLAvA=", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "e271222c042e3e3917223654512af009", + "text": "However, it is generally believed that learn- ing a good dense vector representation needs a large number of labeled pairs of question and con- texts. Dense retrieval methods have thus never be shown to outperform TF-IDF/BM25 for open- domain QA before ORQA (Lee et al., 2019), which proposes a sophisticated inverse cloze task (ICT) objective, predicting the blocks that contain the masked sentence, for additional pretraining. The question encoder and the reader model are then \ufb01ne- tuned using", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJyFU8tuGzEM/BViTwng19px6vSWNggaoA+08C0NDHnF9SrWSls97KSB/73DdVqkzaE3kSKHwyF5+1Sw5ZZdWhldvKViVs7W87IsF9V6Nj8/48n5dHoGazGf1LWazYsBFS0npVVSiH8q5LGKPoeKe7vj0JoYjXdx9Rx0+1S0Xsv3bFYuFoc7YASufNAr6yuVfDhmqtQIhXHjWx6H7ByH8d6H7Ti7mEKuUg6s/zb4QbWd5aH2VRy32SYzrLzNrRtOu1Gn6+JwQLHaWE6PnTAoVNdZg6IgOJYAfFvlNlltOArXgt2mEIbWuG08so9JBQjkND/AMZ1f4DvxQxK8j8yQw9JoIFA5WHFWJvHIMk8n5YVVCfIW4PEK6E35Akhi/wMhtDoQXbncrllUKw8vED74Pe84DMgkMpE2DAGVtY+0Zmvwoyk1KpFlFdyQjNuQoo33mjS7yLTjCqOgwF3giHq9RuSYdUSgVWHDdCxMvoYNVEB2yoQojh+ZY5+hnKbKo4LwiiO66tEDpwASUArr03hgNmrHYJQjaoA2WFJs/N5R8uRzwiLVPrS0vB7eXF2P332azgkO8h0DW/tWGUdfL5EGL9OXb3ifYBrEiZQdDUiUOx3QvjFVQ13wnY8snUTfNQZUsQTgbxxqg19l/U/QUXFLJzfvl6fk1/cQxOx4gGTWBm8olhqmNbZ2G49iotEkRMTfIhmIoh27CnlCV2ltRBY03okGCAbOiJZI+CMZwnEgoZdOkAIrMeVqLKkgMrGj77leT0rHUDY7FMpRGL0aQOxhlIt7NEb33rhkH0d0aSF73jRHpWKuKo6xzrIfmlsvVwVBntvS/8ysUu7lTGQYAzSaek0c7wmbnXjo6yGYDrHkyI04RxBz1N8lrvQ4uuPkZNN/n+RnFVAaSi9lkQ93vwBUSpQw" + } + }, + { + "type": "CompositeElement", + "element_id": "777143635df08e656b47821c0c8e9131", + "text": "pairs of questions and answers jointly. Although ORQA successfully demonstrates that dense retrieval can outperform BM25, setting new state-of-the-art results on multiple open-domain", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJyFU8tuGzEM/BViTwng19px6vSWNggaoA+08C0NDHnF9SrWSls97KSB/73DdVqkzaE3kSKHwyF5+1Sw5ZZdWhldvKViVs7W87IsF9V6Nj8/48n5dHoGazGf1LWazYsBFS0npVVSiH8q5LGKPoeKe7vj0JoYjXdx9Rx0+1S0Xsv3bFYuFoc7YASufNAr6yuVfDhmqtQIhXHjWx6H7ByH8d6H7Ti7mEKuUg6s/zb4QbWd5aH2VRy32SYzrLzNrRtOu1Gn6+JwQLHaWE6PnTAoVNdZg6IgOJYAfFvlNlltOArXgt2mEIbWuG08so9JBQjkND/AMZ1f4DvxQxK8j8yQw9JoIFA5WHFWJvHIMk8n5YVVCfIW4PEK6E35Akhi/wMhtDoQXbncrllUKw8vED74Pe84DMgkMpE2DAGVtY+0Zmvwoyk1KpFlFdyQjNuQoo33mjS7yLTjCqOgwF3giHq9RuSYdUSgVWHDdCxMvoYNVEB2yoQojh+ZY5+hnKbKo4LwiiO66tEDpwASUArr03hgNmrHYJQjaoA2WFJs/N5R8uRzwiLVPrS0vB7eXF2P332azgkO8h0DW/tWGUdfL5EGL9OXb3ifYBrEiZQdDUiUOx3QvjFVQ13wnY8snUTfNQZUsQTgbxxqg19l/U/QUXFLJzfvl6fk1/cQxOx4gGTWBm8olhqmNbZ2G49iotEkRMTfIhmIoh27CnlCV2ltRBY03okGCAbOiJZI+CMZwnEgoZdOkAIrMeVqLKkgMrGj77leT0rHUDY7FMpRGL0aQOxhlIt7NEb33rhkH0d0aSF73jRHpWKuKo6xzrIfmlsvVwVBntvS/8ysUu7lTGQYAzSaek0c7wmbnXjo6yGYDrHkyI04RxBz1N8lrvQ4uuPkZNN/n+RnFVAaSi9lkQ93vwBUSpQw", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "75b661ed523e32a6b210c77ff6baa6b5", + "text": "from above 80% to less than 40% (Yang et al., 2019a).\n\nQA datasets, it also suffers from two weaknesses. First, ICT pretraining is computationally intensive and it is not completely clear that regular sentences are good surrogates of questions in the objective function. Second, because the context encoder is not \ufb01ne-tuned using pairs of questions and answers, the corresponding representations could be subop- timal.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "orig_elements": "eJzlUl1r3DAQ/CuLodDCffkuTc59K4VCXwqleSlpONbS+qJGltzVqpcQ7r9395JA0r/QN2s1mpmd8dVDQ5FGSrILvvkAjbug82616rr1hevXqxZbt+6w33Yr1w/dum1m0Iwk6FFQ8Q+NfexKruzodJ6Ix1BKyKnsnkBXD82YvV1vNu12e7xWDiaX2e9idiiZH1+i3JiF5U0eack1JeLlIfPtsqYiXJ1UJv/6QHc4TpHmPruyHGuUMHc51jHN19Ni8kNzPKrYECLJ/WQOGpymGFRUDS4NoNcR077inop5bSjtG3M46WSX6tiT2WuNR+hOjGPgPAL2+Q/BdvUGJEOkUkBuMMGZDt7+UEYgAYyLGWiKHb5bmNKzicsgkRql/Df+Dt/j2dlGo6dN/95pWr0/b4d2s3HUYzv8r/GvX8b/7SPYaoWkzCBYyiVDqcNAXODUjRwyHAhvk9ZCZQGfAxeZwZdPlzAxCWNIQRsKBVwepyonPxjjPYQklErQZjF5I1dMynLC6RakEBcJ2coWYNrXqIdC9sxRAWSCfc5e/TDnPYrO8gC/KxXTKCqgTwly/4ucmM5Qk7OrBXzXUpKfQU8Oa6ETTie2Nii7dsjPdn7WoV+1ieZSE3moxdaZUNd8rWZLYCoHTWb2xMdMZVIde8GkcZh5fIS7XKNXfXXf52kOEkb9hV/+uV+RGc33pZVxvP4Lus521A==" + } + }, + { + "type": "CompositeElement", + "element_id": "5c304e8ebe397be54afbaa377802bae4", + "text": "In this paper, we address the question: can we train a better dense embedding model using only pairs of questions and passages (or answers), with- out additional pretraining? By leveraging the now standard BERT pretrained model (Devlin et al., 2019) and a dual-encoder architecture (Bromley et al., 1994), we focus on developing the right training scheme using a relatively small number of question and passage pairs. Through a series of careful ablation studies, our \ufb01nal solution is surprisingly", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVl1v4zYQ/CsLAwc4gKXYTnK1rw9Fg2uBou19pHm7HgJaXNlsKFIlKTvuIf+9s5Tt2G0OAe7FsMTVcHZ2dslPXwZsuWGX7owevKFB9d3kshpfVTytax5fTPW8vppMZ7W+4tn49Ww6GNGg4aS0SgrxXwby5y76LlScn1sOjYnReBfvdkGfvgwar2X54mIymz1+Bkbgygd9Z32lkg/9lyqthML5yjd8HjrnOJxvfLg/71xMoatSF1ifPvCDalrLhfZVPG86m0xReds1rpi2ZavrweMjNquN5bRthcFAta012BQEzyUAy1a5ZaeWHIXrgN1yIAytcfexZx+TChDIaX7Ai+l0juXED0nw3vIakVDEClQXbFbRJC51XpmOJ7MFhzQAkf8hXV4eISFy/g0Ys+kRxvC/AIvgG8vbyXx+Gc3SKZHtWZj55AhG0qFy9O1gxwpJ+LcizU+y+435K8wss8hnVYKVnwWavyT1ixCT8Xh8hPHrxqh07zfx3nyF1P1ThCDnLGGTZ6Gn4xfofQ1MrNrCvHeua+ARUf/xCOoXR2llIrUKjTmiDZPSOnCMeM30d8dROuENVcrJYgrKOFK04JQ4kGYXmRjAWhu3JGljS12U/97ZLWBNiOTrA1Ik5TRexygNRUMf8CJuOMQz7G7SqiDfJSFhJBy6tYHzrsD8ga63ZHnNQS1lC6Ho/IYgltMqaLr+6eb28AHrHZ9h34PEwLXliESfs8xDke6ULdhVCASTUK2gZZ4dNLzuPXj4TMx4liWqfdUhK4f812x9u+cSzHKVaM+WYrXC6NzJoSgwvGPwwZZio6ylviLH6hyL02tX0u0q+G65AkDkYDirWanAdWdJLWyeVFCg01gbQbxAf3b1YjwR7SJmXV5HhWMX2mCEixAwMhbfZNZP5UOUb5NpzD/QrkZpGvUgT3lNxi209RqzNZM4NkgmjgR5rVw6ZLCGlD7EvrAIIb/4C6+gAVW+aVUoKEsDLV4ySu86lapVSe+R49vsvA+7jW5QcSO+QK0/3JxJIvxQcStAheAj4xS8W5b0S4JlUu9OOA2uR6JNpOvfp1e02GIXqwIgG/xi0+Hrq3L6itYoxOW0nL8SIre+La5IVRU6rNrCEgtxrI0eCkScMTu2sVuIMZNBISB38Ot8kIpvsuxOF8nDe5o+/nhA2wkD/ZOn9zdYGV5OyquewcVFefHqTNAFwLfs6F3f5/TxIFtEa4qqQ0zDE8t/T0cj6bQZek0r7yDjotvJjx5IG197q0v6GdVJ2fuaG9EUcmIQgodKfXWFEXKEnkcNwKlrR2K2glrovTMmF6lz+5551vq7hsw9xg8m5oT2kcVJg/TWrnvoyojAUG4ZWKWTCucC93ly05qAM94e6pWLF7slnJb6nJ4fQHDFNttngcHDrBnS/IGbitNZGxjQ1NsMMNpXSUTFrBVrS8FwE2lkhB6lLdMP2CNYZoUBAgFDb+d+78rE3ME4CsQVAR/IGRQlzcMXz5uplHkpF5ptP4FkHAYHeCXTrh+OQBGaybf7bfmp7foDAUORDy2rClpYmC/sz4C9huCYL1iYK8JB7naofzx2a/GU2M6loxO/xzza7QhA1Sqv5KsYFuM2Jm5iKafe/qb2ToWQB+qtnGWPn/8FOBWoaA==" + } + }, + { + "type": "CompositeElement", + "element_id": "62da968f266c68f860bd5394735f6b71", + "text": "simple: the embedding is optimized for maximizing inner products of the question and relevant passage vectors, with an objective compar- ing all pairs of questions and passages in a batch. Our Dense Passage Retriever (DPR) is exception- ally strong. It not only outperforms BM25 by a large margin (65.2% vs. 42.9% in Top-5 accuracy), but also results in a substantial improvement on the end-to-end QA accuracy compared to ORQA (41.5% vs. 33.3%) in the open Natural Questions setting (Lee et al.,", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVl1v4zYQ/CsLAwc4gKXYTnK1rw9Fg2uBou19pHm7HgJaXNlsKFIlKTvuIf+9s5Tt2G0OAe7FsMTVcHZ2dslPXwZsuWGX7owevKFB9d3kshpfVTytax5fTPW8vppMZ7W+4tn49Ww6GNGg4aS0SgrxXwby5y76LlScn1sOjYnReBfvdkGfvgwar2X54mIymz1+Bkbgygd9Z32lkg/9lyqthML5yjd8HjrnOJxvfLg/71xMoatSF1ifPvCDalrLhfZVPG86m0xReds1rpi2ZavrweMjNquN5bRthcFAta012BQEzyUAy1a5ZaeWHIXrgN1yIAytcfexZx+TChDIaX7Ai+l0juXED0nw3vIakVDEClQXbFbRJC51XpmOJ7MFhzQAkf8hXV4eISFy/g0Ys+kRxvC/AIvgG8vbyXx+Gc3SKZHtWZj55AhG0qFy9O1gxwpJ+LcizU+y+435K8wss8hnVYKVnwWavyT1ixCT8Xh8hPHrxqh07zfx3nyF1P1ThCDnLGGTZ6Gn4xfofQ1MrNrCvHeua+ARUf/xCOoXR2llIrUKjTmiDZPSOnCMeM30d8dROuENVcrJYgrKOFK04JQ4kGYXmRjAWhu3JGljS12U/97ZLWBNiOTrA1Ik5TRexygNRUMf8CJuOMQz7G7SqiDfJSFhJBy6tYHzrsD8ga63ZHnNQS1lC6Ho/IYgltMqaLr+6eb28AHrHZ9h34PEwLXliESfs8xDke6ULdhVCASTUK2gZZ4dNLzuPXj4TMx4liWqfdUhK4f812x9u+cSzHKVaM+WYrXC6NzJoSgwvGPwwZZio6ylviLH6hyL02tX0u0q+G65AkDkYDirWanAdWdJLWyeVFCg01gbQbxAf3b1YjwR7SJmXV5HhWMX2mCEixAwMhbfZNZP5UOUb5NpzD/QrkZpGvUgT3lNxi209RqzNZM4NkgmjgR5rVw6ZLCGlD7EvrAIIb/4C6+gAVW+aVUoKEsDLV4ySu86lapVSe+R49vsvA+7jW5QcSO+QK0/3JxJIvxQcStAheAj4xS8W5b0S4JlUu9OOA2uR6JNpOvfp1e02GIXqwIgG/xi0+Hrq3L6itYoxOW0nL8SIre+La5IVRU6rNrCEgtxrI0eCkScMTu2sVuIMZNBISB38Ot8kIpvsuxOF8nDe5o+/nhA2wkD/ZOn9zdYGV5OyquewcVFefHqTNAFwLfs6F3f5/TxIFtEa4qqQ0zDE8t/T0cj6bQZek0r7yDjotvJjx5IG197q0v6GdVJ2fuaG9EUcmIQgodKfXWFEXKEnkcNwKlrR2K2glrovTMmF6lz+5551vq7hsw9xg8m5oT2kcVJg/TWrnvoyojAUG4ZWKWTCucC93ly05qAM94e6pWLF7slnJb6nJ4fQHDFNttngcHDrBnS/IGbitNZGxjQ1NsMMNpXSUTFrBVrS8FwE2lkhB6lLdMP2CNYZoUBAgFDb+d+78rE3ME4CsQVAR/IGRQlzcMXz5uplHkpF5ptP4FkHAYHeCXTrh+OQBGaybf7bfmp7foDAUORDy2rClpYmC/sz4C9huCYL1iYK8JB7naofzx2a/GU2M6loxO/xzza7QhA1Sqv5KsYFuM2Jm5iKafe/qb2ToWQB+qtnGWPn/8FOBWoaA==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "29484619b37e237e06f4ac150c052fd0", + "text": "2019; Kwiatkowski et al., 2019). Our contributions are twofold. First, we demon- strate that with the proper training setup, sim- ply \ufb01ne-tuning the question and passage encoders on existing question-passage pairs is suf\ufb01cient to greatly outperform BM25. Our empirical results also suggest that additional pretraining may not be needed. Second, we verify that, in the context of open-domain question answering, a higher retrieval precision indeed translates to a higher end-to-end QA accuracy. By", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVl1v4zYQ/CsLAwc4gKXYTnK1rw9Fg2uBou19pHm7HgJaXNlsKFIlKTvuIf+9s5Tt2G0OAe7FsMTVcHZ2dslPXwZsuWGX7owevKFB9d3kshpfVTytax5fTPW8vppMZ7W+4tn49Ww6GNGg4aS0SgrxXwby5y76LlScn1sOjYnReBfvdkGfvgwar2X54mIymz1+Bkbgygd9Z32lkg/9lyqthML5yjd8HjrnOJxvfLg/71xMoatSF1ifPvCDalrLhfZVPG86m0xReds1rpi2ZavrweMjNquN5bRthcFAta012BQEzyUAy1a5ZaeWHIXrgN1yIAytcfexZx+TChDIaX7Ai+l0juXED0nw3vIakVDEClQXbFbRJC51XpmOJ7MFhzQAkf8hXV4eISFy/g0Ys+kRxvC/AIvgG8vbyXx+Gc3SKZHtWZj55AhG0qFy9O1gxwpJ+LcizU+y+435K8wss8hnVYKVnwWavyT1ixCT8Xh8hPHrxqh07zfx3nyF1P1ThCDnLGGTZ6Gn4xfofQ1MrNrCvHeua+ARUf/xCOoXR2llIrUKjTmiDZPSOnCMeM30d8dROuENVcrJYgrKOFK04JQ4kGYXmRjAWhu3JGljS12U/97ZLWBNiOTrA1Ik5TRexygNRUMf8CJuOMQz7G7SqiDfJSFhJBy6tYHzrsD8ga63ZHnNQS1lC6Ho/IYgltMqaLr+6eb28AHrHZ9h34PEwLXliESfs8xDke6ULdhVCASTUK2gZZ4dNLzuPXj4TMx4liWqfdUhK4f812x9u+cSzHKVaM+WYrXC6NzJoSgwvGPwwZZio6ylviLH6hyL02tX0u0q+G65AkDkYDirWanAdWdJLWyeVFCg01gbQbxAf3b1YjwR7SJmXV5HhWMX2mCEixAwMhbfZNZP5UOUb5NpzD/QrkZpGvUgT3lNxi209RqzNZM4NkgmjgR5rVw6ZLCGlD7EvrAIIb/4C6+gAVW+aVUoKEsDLV4ySu86lapVSe+R49vsvA+7jW5QcSO+QK0/3JxJIvxQcStAheAj4xS8W5b0S4JlUu9OOA2uR6JNpOvfp1e02GIXqwIgG/xi0+Hrq3L6itYoxOW0nL8SIre+La5IVRU6rNrCEgtxrI0eCkScMTu2sVuIMZNBISB38Ot8kIpvsuxOF8nDe5o+/nhA2wkD/ZOn9zdYGV5OyquewcVFefHqTNAFwLfs6F3f5/TxIFtEa4qqQ0zDE8t/T0cj6bQZek0r7yDjotvJjx5IG197q0v6GdVJ2fuaG9EUcmIQgodKfXWFEXKEnkcNwKlrR2K2glrovTMmF6lz+5551vq7hsw9xg8m5oT2kcVJg/TWrnvoyojAUG4ZWKWTCucC93ly05qAM94e6pWLF7slnJb6nJ4fQHDFNttngcHDrBnS/IGbitNZGxjQ1NsMMNpXSUTFrBVrS8FwE2lkhB6lLdMP2CNYZoUBAgFDb+d+78rE3ME4CsQVAR/IGRQlzcMXz5uplHkpF5ptP4FkHAYHeCXTrh+OQBGaybf7bfmp7foDAUORDy2rClpYmC/sz4C9huCYL1iYK8JB7naofzx2a/GU2M6loxO/xzza7QhA1Sqv5KsYFuM2Jm5iKafe/qb2ToWQB+qtnGWPn/8FOBWoaA==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "c0729d3c185f8d0c401bbbad6bc11da0", + "text": "applying a modern reader model to the top retrieved passages, we achieve compara- ble or better results on multiple QA datasets in the open-retrieval setting, compared to several, much complicated systems.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVl1v4zYQ/CsLAwc4gKXYTnK1rw9Fg2uBou19pHm7HgJaXNlsKFIlKTvuIf+9s5Tt2G0OAe7FsMTVcHZ2dslPXwZsuWGX7owevKFB9d3kshpfVTytax5fTPW8vppMZ7W+4tn49Ww6GNGg4aS0SgrxXwby5y76LlScn1sOjYnReBfvdkGfvgwar2X54mIymz1+Bkbgygd9Z32lkg/9lyqthML5yjd8HjrnOJxvfLg/71xMoatSF1ifPvCDalrLhfZVPG86m0xReds1rpi2ZavrweMjNquN5bRthcFAta012BQEzyUAy1a5ZaeWHIXrgN1yIAytcfexZx+TChDIaX7Ai+l0juXED0nw3vIakVDEClQXbFbRJC51XpmOJ7MFhzQAkf8hXV4eISFy/g0Ys+kRxvC/AIvgG8vbyXx+Gc3SKZHtWZj55AhG0qFy9O1gxwpJ+LcizU+y+435K8wss8hnVYKVnwWavyT1ixCT8Xh8hPHrxqh07zfx3nyF1P1ThCDnLGGTZ6Gn4xfofQ1MrNrCvHeua+ARUf/xCOoXR2llIrUKjTmiDZPSOnCMeM30d8dROuENVcrJYgrKOFK04JQ4kGYXmRjAWhu3JGljS12U/97ZLWBNiOTrA1Ik5TRexygNRUMf8CJuOMQz7G7SqiDfJSFhJBy6tYHzrsD8ga63ZHnNQS1lC6Ho/IYgltMqaLr+6eb28AHrHZ9h34PEwLXliESfs8xDke6ULdhVCASTUK2gZZ4dNLzuPXj4TMx4liWqfdUhK4f812x9u+cSzHKVaM+WYrXC6NzJoSgwvGPwwZZio6ylviLH6hyL02tX0u0q+G65AkDkYDirWanAdWdJLWyeVFCg01gbQbxAf3b1YjwR7SJmXV5HhWMX2mCEixAwMhbfZNZP5UOUb5NpzD/QrkZpGvUgT3lNxi209RqzNZM4NkgmjgR5rVw6ZLCGlD7EvrAIIb/4C6+gAVW+aVUoKEsDLV4ySu86lapVSe+R49vsvA+7jW5QcSO+QK0/3JxJIvxQcStAheAj4xS8W5b0S4JlUu9OOA2uR6JNpOvfp1e02GIXqwIgG/xi0+Hrq3L6itYoxOW0nL8SIre+La5IVRU6rNrCEgtxrI0eCkScMTu2sVuIMZNBISB38Ot8kIpvsuxOF8nDe5o+/nhA2wkD/ZOn9zdYGV5OyquewcVFefHqTNAFwLfs6F3f5/TxIFtEa4qqQ0zDE8t/T0cj6bQZek0r7yDjotvJjx5IG197q0v6GdVJ2fuaG9EUcmIQgodKfXWFEXKEnkcNwKlrR2K2glrovTMmF6lz+5551vq7hsw9xg8m5oT2kcVJg/TWrnvoyojAUG4ZWKWTCucC93ly05qAM94e6pWLF7slnJb6nJ4fQHDFNttngcHDrBnS/IGbitNZGxjQ1NsMMNpXSUTFrBVrS8FwE2lkhB6lLdMP2CNYZoUBAgFDb+d+78rE3ME4CsQVAR/IGRQlzcMXz5uplHkpF5ptP4FkHAYHeCXTrh+OQBGaybf7bfmp7foDAUORDy2rClpYmC/sz4C9huCYL1iYK8JB7naofzx2a/GU2M6loxO/xzza7QhA1Sqv5KsYFuM2Jm5iKafe/qb2ToWQB+qtnGWPn/8FOBWoaA==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "8b17b1f0904845bb412dd8aa76d2602f", + "text": "2 Background\n\nThe problem of open-domain QA studied in this paper can be described as follows. Given a factoid question, such as \u201cWho \ufb01rst voiced Meg on Family Guy?\u201d or \u201cWhere was the 8th Dalai Lama born?\u201d, a system is required to answer it using a large corpus of diversi\ufb01ed topics. More speci\ufb01cally, we assume", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJztkstu2zAQRX9loLUdW5IfcjdFi6LZNAUKBOgiDYwROZKI8BU+7BiG/73DxC3afkN3pObOnTOXejhXpMmQTXslq3dQrVvqNrvVtt0tV5ut7OS6q+sW+3Zbr1c9DtUMKkMJJSZk/bkqh310OQh6vXsKRsWonI37q+jhXBknS7lt6667PLJHIOGC3GsnMLnw1olpKgiLyRlahGwthcXRhadFtjGFLFIOJP++0Asar2kunYgLk3VSc+F0Nnbe+Bsvh+py4WGD0pROvhBU6L1WPJQBF0XAZY12zDhSLKwV2bEqhJ6/7G02PRW8pvgkeknFo4GPKJ7G4LKVxeCX971KmipW/puqbJab9a5rkESPUmy6el0LxO1uM7Qdbfv/qZb0JgIfXM/JgRvAebJMYFBZ+PYBYspSkQS+pUlF8MiZgEALPYGkKILquYwRBqe1O8YbuFUHsoAwoEhOSXjOFAvhDGIWU5H+yM2yFt8nx6ehX9YhJjg4JdjojkZwFj6jUfoEt/n0/lUswYXfbRQIjmyTmLxLE3xCjQq+oEHoXbDXjhkjxFNMvBZzB3rOikOG5ABtPPISKkGOyo6s0xhGAn5En2MJQfIKIao3utcmrwSvdud4dPQkriWBWp9mcCTeKmZDf/6VXzEEfpkD3ZegL48/AcqLVps=" + } + }, + { + "type": "CompositeElement", + "element_id": "560469b1d37a074b47b3cdab32c07896", + "text": "the extractive QA setting, in which the answer is restricted to a span appearing in one or more pas- sages in the corpus. Assume that our collection contains D documents, d1, d2, \u00b7 \u00b7 \u00b7 , dD. We \ufb01rst split each of the documents into text passages of equal lengths as the basic retrieval units3 and get M total passages in our corpus C = {p1, p2, . . . , pM }, where each passage pi can be viewed as a sequence 2 , \u00b7 \u00b7 \u00b7 , w(i) 1 , w(i) of tokens w(i) |pi|. Given a question q, the task is to \ufb01nd a", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVU1vGzcQ/SuDPVmotPp0/AH0EDhIe0mAFgV8SAOD4s5ahLjkhh+WVdv/vW+4kisHaQ+FDyaX5Myb996MvjxVbLljl+5MU11T1S6V1uezuW6a87lerS5W63fNleYlr1er2cVFNaaq46QalRTuP1WyuIs+B81l33PoTIzGu3h3uPTlqep8I8fL5fzy8uUrYgTWPjR31muVfBheqrQRCNON73gasnMcpjsfttPsYgpZpxy4ebvhR9X1lieN13HaZZvMRHubOzdZ9HXftNXLC5K1xnLa94KgUn1vDZIC4FQu4Ngqd5/VPUfBWrG7rwShNW4bB/QxqQCCXMOPUsRqjuPEj0ni9QsJkYOVza+t98n5xPWqQuLvX14t3528vNmwA5OW6vFJCG3wWuNoMZtfBFaNAZ4fxVpdncSSy/8nyPnyJMio/mEp51VRrAdDdy53axa5Fi8nD9OGCcugdDIPTL+9p8gpIeeYjKPdxugNyR3l4o4DmUiBIaLRiRtKnhTFXjmCNKwCnskr75h8oM4Hpl7FCUVRSE4kEszT51jT+xhzx/ikEsGD+G4taxEXS5eUcZE+ENyRxeNxTA20axZj+jPPZuuL7/7h6ENNt4x9u57NQ0wAZk0iVqjAtyX1azBgAXbhQAAO8HCHv2VoamGjtImkYnm0VtFoVI2i+QHH2ZkUlyCkoXtO9AksiBNe4wgBpRwpk27oZ3rqgbwH8rr8Yf2JoMFuwyCo4Ds8pt6QBptrpgfDOzAMDGAYuNhppgX9a/W7MzOi+XEh9fotg8Gyfe7Nc02/QGAoRQgWC83fxqXApOJWhAUjA3mo7CBreR1/mv932nhctMF3g/oD36+coK6isxR3cJKcH5HU9Nknow9mAA7tH3BF0c40oALGwgyQoI3vxBfjEyNRNH9xCcwQyu4pYCTwAKUz1so0G57qPKFB/TOu7+sx3Zqt6bkxaiQ518fLh1NJccvrkTgVUGB7zKgx4ANJz24yYCkds4+JO3LMTWHROG1zIz1D3A6UaoPERxOxuKPrQRS+vfISeULSAMJ9p6yVRhTgAVP+QclN2BWlr7mVxpJhuJeGE5wyJ4RTf+zl0549k2lFCKYsypKxMqpX9NEHybKH0Ky2peHVCcDf6ZrOYJCbEcRezK8WdPNRTKKozW7o0kEsteXSKsb1Ob1xV2kR9U8jyBYJcnASpsvwfSkU2QaSbMLyUHUpVnIi++IS2Y/98oxvz2iqLZ1p01wvLmHvG7gb9SDqEOgRnbN9W8+hraSDs5LZBelM9Lb8mpBU4/vJ9qTN8VuaweR+fBiCZpgGbZmU8qQtpU4oFdNAk8NF4HudX4c2KkwNasS3xpepffx5+6xCUDKF/5DZ/PL1b1GQn/0=" + } + }, + { + "type": "CompositeElement", + "element_id": "c97a9ab7aa86ce7ae1b0e5d718fdeee0", + "text": "span w(i) s+1, \u00b7 \u00b7 \u00b7 , w(i) s , w(i) from one of the passages pi that can answer the question. Notice that to cover a wide variety of domains, the corpus size can easily range from millions of docu- ments (e.g., Wikipedia) to billions (e.g., the Web). As a result, any open-domain QA system needs to include an ef\ufb01cient retriever component that can se- lect a small set of relevant texts, before applying the reader to extract the answer (Chen et al., 2017).4 Formally speaking, a retriever R : (q,", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVU1vGzcQ/SuDPVmotPp0/AH0EDhIe0mAFgV8SAOD4s5ahLjkhh+WVdv/vW+4kisHaQ+FDyaX5Myb996MvjxVbLljl+5MU11T1S6V1uezuW6a87lerS5W63fNleYlr1er2cVFNaaq46QalRTuP1WyuIs+B81l33PoTIzGu3h3uPTlqep8I8fL5fzy8uUrYgTWPjR31muVfBheqrQRCNON73gasnMcpjsfttPsYgpZpxy4ebvhR9X1lieN13HaZZvMRHubOzdZ9HXftNXLC5K1xnLa94KgUn1vDZIC4FQu4Ngqd5/VPUfBWrG7rwShNW4bB/QxqQCCXMOPUsRqjuPEj0ni9QsJkYOVza+t98n5xPWqQuLvX14t3528vNmwA5OW6vFJCG3wWuNoMZtfBFaNAZ4fxVpdncSSy/8nyPnyJMio/mEp51VRrAdDdy53axa5Fi8nD9OGCcugdDIPTL+9p8gpIeeYjKPdxugNyR3l4o4DmUiBIaLRiRtKnhTFXjmCNKwCnskr75h8oM4Hpl7FCUVRSE4kEszT51jT+xhzx/ikEsGD+G4taxEXS5eUcZE+ENyRxeNxTA20axZj+jPPZuuL7/7h6ENNt4x9u57NQ0wAZk0iVqjAtyX1azBgAXbhQAAO8HCHv2VoamGjtImkYnm0VtFoVI2i+QHH2ZkUlyCkoXtO9AksiBNe4wgBpRwpk27oZ3rqgbwH8rr8Yf2JoMFuwyCo4Ds8pt6QBptrpgfDOzAMDGAYuNhppgX9a/W7MzOi+XEh9fotg8Gyfe7Nc02/QGAoRQgWC83fxqXApOJWhAUjA3mo7CBreR1/mv932nhctMF3g/oD36+coK6isxR3cJKcH5HU9Nknow9mAA7tH3BF0c40oALGwgyQoI3vxBfjEyNRNH9xCcwQyu4pYCTwAKUz1so0G57qPKFB/TOu7+sx3Zqt6bkxaiQ518fLh1NJccvrkTgVUGB7zKgx4ANJz24yYCkds4+JO3LMTWHROG1zIz1D3A6UaoPERxOxuKPrQRS+vfISeULSAMJ9p6yVRhTgAVP+QclN2BWlr7mVxpJhuJeGE5wyJ4RTf+zl0549k2lFCKYsypKxMqpX9NEHybKH0Ky2peHVCcDf6ZrOYJCbEcRezK8WdPNRTKKozW7o0kEsteXSKsb1Ob1xV2kR9U8jyBYJcnASpsvwfSkU2QaSbMLyUHUpVnIi++IS2Y/98oxvz2iqLZ1p01wvLmHvG7gb9SDqEOgRnbN9W8+hraSDs5LZBelM9Lb8mpBU4/vJ9qTN8VuaweR+fBiCZpgGbZmU8qQtpU4oFdNAk8NF4HudX4c2KkwNasS3xpepffx5+6xCUDKF/5DZ/PL1b1GQn/0=", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "f43de53cd34b16adb5018883c5686a39", + "text": "C) \u2192 CF is a function that takes as input a question q and a corpus C and returns a much smaller \ufb01lter set of texts CF \u2282 C, where |CF | = k (cid:28) |C|. For a \ufb01xed k, a retriever can be evaluated in isolation on top-k retrieval accuracy, which is the fraction of ques- tions for which CF contains a span that answers the question.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJydVU1vGzcQ/SuDPVmotPp0/AH0EDhIe0mAFgV8SAOD4s5ahLjkhh+WVdv/vW+4kisHaQ+FDyaX5Myb996MvjxVbLljl+5MU11T1S6V1uezuW6a87lerS5W63fNleYlr1er2cVFNaaq46QalRTuP1WyuIs+B81l33PoTIzGu3h3uPTlqep8I8fL5fzy8uUrYgTWPjR31muVfBheqrQRCNON73gasnMcpjsfttPsYgpZpxy4ebvhR9X1lieN13HaZZvMRHubOzdZ9HXftNXLC5K1xnLa94KgUn1vDZIC4FQu4Ngqd5/VPUfBWrG7rwShNW4bB/QxqQCCXMOPUsRqjuPEj0ni9QsJkYOVza+t98n5xPWqQuLvX14t3528vNmwA5OW6vFJCG3wWuNoMZtfBFaNAZ4fxVpdncSSy/8nyPnyJMio/mEp51VRrAdDdy53axa5Fi8nD9OGCcugdDIPTL+9p8gpIeeYjKPdxugNyR3l4o4DmUiBIaLRiRtKnhTFXjmCNKwCnskr75h8oM4Hpl7FCUVRSE4kEszT51jT+xhzx/ikEsGD+G4taxEXS5eUcZE+ENyRxeNxTA20axZj+jPPZuuL7/7h6ENNt4x9u57NQ0wAZk0iVqjAtyX1azBgAXbhQAAO8HCHv2VoamGjtImkYnm0VtFoVI2i+QHH2ZkUlyCkoXtO9AksiBNe4wgBpRwpk27oZ3rqgbwH8rr8Yf2JoMFuwyCo4Ds8pt6QBptrpgfDOzAMDGAYuNhppgX9a/W7MzOi+XEh9fotg8Gyfe7Nc02/QGAoRQgWC83fxqXApOJWhAUjA3mo7CBreR1/mv932nhctMF3g/oD36+coK6isxR3cJKcH5HU9Nknow9mAA7tH3BF0c40oALGwgyQoI3vxBfjEyNRNH9xCcwQyu4pYCTwAKUz1so0G57qPKFB/TOu7+sx3Zqt6bkxaiQ518fLh1NJccvrkTgVUGB7zKgx4ANJz24yYCkds4+JO3LMTWHROG1zIz1D3A6UaoPERxOxuKPrQRS+vfISeULSAMJ9p6yVRhTgAVP+QclN2BWlr7mVxpJhuJeGE5wyJ4RTf+zl0549k2lFCKYsypKxMqpX9NEHybKH0Ky2peHVCcDf6ZrOYJCbEcRezK8WdPNRTKKozW7o0kEsteXSKsb1Ob1xV2kR9U8jyBYJcnASpsvwfSkU2QaSbMLyUHUpVnIi++IS2Y/98oxvz2iqLZ1p01wvLmHvG7gb9SDqEOgRnbN9W8+hraSDs5LZBelM9Lb8mpBU4/vJ9qTN8VuaweR+fBiCZpgGbZmU8qQtpU4oFdNAk8NF4HudX4c2KkwNasS3xpepffx5+6xCUDKF/5DZ/PL1b1GQn/0=", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "3d496ab9358cea4487f3f6c065be10a3", + "text": "e\n\n3 Dense Passage Retriever (DPR)", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJztUMtOwzAQ/JXIJ5AoSeOQB+d+QFVxq6rIsdethV+y11BU9d+xeRxAfAK33Z3Z2dnZXwhoMGBxVoI8VoSJbphkw+XSUQENHde9XKgc1uPEloX15K4iBpAJhizzL6QUc3QpcPjoPQSjYlTOxvmLtL8Q40SBKV2P4/WQNQJwF8SsHWfowucmw1OxUJ+cgTokayHUry4818lGDIljCiB+NnBmxmtYCcdjbZJGteJOJ2NXrb/3QpLrNR+TSgO+efh40Hut8tFssC6EDGtmj4kdIRavBOyRFIc+T2abzALFXlt0EM5YNKBsfQs+KdRAMvw7ym5oYWrkMI1Ds3QtH2Xb0QeZh33PKZX/UWYNWm3ARqi2LMbMqnaAQcELhOpms93d/hH04R2GWuXv" + } + }, + { + "type": "CompositeElement", + "element_id": "314aad20404c8e22b3d66e538dd6f99e", + "text": "We focus our research in this work on improv- ing the retrieval component in open-domain QA. Given a collection of M text passages, the goal of our dense passage retriever (DPR) is to index all the passages in a low-dimensional and continuous space, such that it can retrieve ef\ufb01ciently the top k passages relevant to the input question for the reader at run-time. Note that M can be very large (e.g., 21 million passages in our experiments, de- scribed in Section 4.1) and k is usually small, such", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJxVUktv2kAQ/isjnxIJGxsIhd4qVeopUV9SD0mEht0xrNhX90FAiP/eWQNterLXM/u9/D2fKtJkyKaVktVHqPoPXTsV/WT5gN18OcV2sehmy3bxMFt3M5w/VCOoDCWUmJD3T1V5WUWXg6Dh7CkYFaNyNq6uS8+nyjhZxtNpt1icXxkjkHBBrrQTmFy43MS0LRLGW2doHLK1FMZvLuzG2cYUskg5kPz/QAc0XlMtnYhjk3VStXA6G1tPfONlX53PTNYrTenoi4IKvdeKSVnguCzwWKPdZNxQLForspuqKNTK7uJFfUwYOCAr6cAfZvM5jxMdUsFroCsYOehyinkdSRTwZtZ01WDVM/TKZrOm4nNyfnf5F0HvRI7A+UGgSBjEFpSFtFURindwFpTxwe1r/r7hAfFiCor2qEE4453lv1fuOE+WgzDI798+NfBF7ckC8pLWF03geniEQg4eYyyWRwPixjEYD4sMSTbSbX7jogB3n79+vweWlRwMUQBqPdy+YRURCNq91VJxpUoHGBatZAk2KZsdO40eBY0gZjaatsjKEwi0f4mA+pfcr9tOKPaljwNDch52/3gCd3aP7JqllLGyPif4nSkOLnsXrjmhZOHMwWWqE2tq4MkluvA+DrRrAjZ3BI2B3d5Rs2lGMOnAKK0L1ntvJR06cMGLu8TRSaohiqDWJMv8xzVl/vP3g+1diSvHzEEdIRp+XI1jhEn7kidtN+3atikFuvXzCUPgdu7pZ+nI+fUPI9Q8Qw==" + } + }, + { + "type": "CompositeElement", + "element_id": "cd5d190ae1e89f9c4ea1ccdb904b3699", + "text": "as 20\u2013100.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJxVUktv2kAQ/isjnxIJGxsIhd4qVeopUV9SD0mEht0xrNhX90FAiP/eWQNterLXM/u9/D2fKtJkyKaVktVHqPoPXTsV/WT5gN18OcV2sehmy3bxMFt3M5w/VCOoDCWUmJD3T1V5WUWXg6Dh7CkYFaNyNq6uS8+nyjhZxtNpt1icXxkjkHBBrrQTmFy43MS0LRLGW2doHLK1FMZvLuzG2cYUskg5kPz/QAc0XlMtnYhjk3VStXA6G1tPfONlX53PTNYrTenoi4IKvdeKSVnguCzwWKPdZNxQLForspuqKNTK7uJFfUwYOCAr6cAfZvM5jxMdUsFroCsYOehyinkdSRTwZtZ01WDVM/TKZrOm4nNyfnf5F0HvRI7A+UGgSBjEFpSFtFURindwFpTxwe1r/r7hAfFiCor2qEE4453lv1fuOE+WgzDI798+NfBF7ckC8pLWF03geniEQg4eYyyWRwPixjEYD4sMSTbSbX7jogB3n79+vweWlRwMUQBqPdy+YRURCNq91VJxpUoHGBatZAk2KZsdO40eBY0gZjaatsjKEwi0f4mA+pfcr9tOKPaljwNDch52/3gCd3aP7JqllLGyPif4nSkOLnsXrjmhZOHMwWWqE2tq4MkluvA+DrRrAjZ3BI2B3d5Rs2lGMOnAKK0L1ntvJR06cMGLu8TRSaohiqDWJMv8xzVl/vP3g+1diSvHzEEdIRp+XI1jhEn7kidtN+3atikFuvXzCUPgdu7pZ+nI+fUPI9Q8Qw==", + "is_continuation": true + } + }, + { + "type": "CompositeElement", + "element_id": "2e192dac77fe2cc83b3fc71c21ff8beb", + "text": "3.1 Overview\n\nOur dense passage retriever (DPR) uses a dense encoder EP (\u00b7) which maps any text passage to a d- dimensional real-valued vectors and builds an index for all the M passages that we will use for retrieval.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJztkstu2zAQRX9lwFUCVJZkyZbcdbtsExTZpYEwIkcWUT4EPiwHhv+9ZOoUbb+hO47mzpnLSz1fGCnSZMIgBfsIbIvYN/wwdW11EG0nCGm/60XHqy2v9rVgH4BpCigwYNJfWD4M3kbH6a1eyGnpvbTGDzfR84VpK3K7aeq+v74khiNunRiU5Ris+zWJYc4WytlqKl00hly5WvejjMYHF3mIjsTfBZ1RL4oKYbkvdVRBFtyqqE2xXTaLmNj1mpZNUlF4XbIDhsuiZFqaDJZZkNoKzTHikXz2ysgcWXa4pC+DiXqkbG+bOYHOITOaTQ0PJ3InSWsGvLOfZFDEkvLfVNuxp27kh1Fg3fXTbmq6w9RznFq+79td/T/VxHiIDgQZT7Cg90kHjoKTlIKGu0+P3+4hevKANxEZnq7v4PMj3H2PVTV297DOks+gcUky8wqZ/BsWbB4tQMj0MDlJVGkBquKEKpKAE/GUWR4UMEapRD6CNILOMFkHqBSEmeDLO9GnEgOsBKtMvWTuTXczjWrz56/xFZ1L8ZzoKd/2+vIT76ss9A==" + } + }, + { + "type": "CompositeElement", + "element_id": "f7866091872501fa5032475be4da87df", + "text": "3The ideal size and boundary of a text passage are func- tions of both the retriever and reader. We also experimented with natural paragraphs in our preliminary trials and found that using \ufb01xed-length passages performs better in both retrieval and \ufb01nal QA accuracy, as observed by Wang et al. (2019).\n\n4Exceptions include (Seo et al., 2019) and (Roberts et al., 2020), which retrieves and generates the answers, respectively.", + "metadata": { + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/multi-column-2p.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "orig_elements": "eJzlU01v2zAM/SuETy2QD1txm2S3HXYdsK1AD10RyBIdC5Ulg5LysSL/fVScDWkPG3bezTTJx8enx6fXAi326OLG6OIDFK0QYqnru+YesRbVsm7EUi61WIsKUeimmEDRY5RaRsn1r0X+2ASfSOE5HpB6E4LxLmwuRU+vRe91Ti8W1Wp1emYMQuVJb6xXMnoaO2XsMoV553ucU3IOab739DJPLkRKKiZC/TbAg+wHi1PtVZj3yUYzVd6m3k3FMBt0W5xOPKw1FuNxyAwKOQzW8FAmOM8FnLbSbZPcYshcC3TbIjMc+M/Gpb7BTE9knIiHmDEWDx2C0SgtBPMDQToNjU9OSzqCb0FCroRBhsAgIAmhTU5NIU8NuaLxsYPIKISRDO6QziCEUiPN4JGbbPCAB5bT5OdBDXvDPU7y4jx3kCS3JIcugHHA8sNAaE1vXObAmNx/hmwzLx4lI6Rg3Ba+p7YpqwPqqeVVGfJCMwDPaj31ARqMkRkx8JnnhSNPzYBjv+Poy0eQSjEddZyA5L2agLRjps0RHllTwMhrzOBGlNX6dpal/vUKnyURv8EOH7KkrO17Hy6ahVg19+W6rNfrSjd1uVLl/V1Vq1asS1z+Pz60xr2EkX2Iklggp/GQLVldWfIbelbDwmySkRLZ/FOZiLOAPuvP1rKj0G9RFosrlFz4j/11fdX/1fO1xJCZvIehMSVKUfZJdcXpLzdWfzooHMaDMU7ZpBFueMuLqSZwNtXZkTeXsVcpUd5OYN8Z9du9OJ7DFvk1ZeQoX590YY8UJlwUBlTZkPb4J6M+/wSi9rz4" + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.pdf.json b/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.pdf.json deleted file mode 100644 index f22c0caa59..0000000000 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-chunk-no-orig-elements/multi-column-2p.pdf.json +++ /dev/null @@ -1,142 +0,0 @@ -[ - { - "type": "CompositeElement", - "element_id": "06c85506db46c8d0e4f014e75146bcfc", - "text": "0 2 0 2\n\np e S 0 3\n\n] L C . s c [\n\n3 v 6 0 9 4 0 . 4 0 0 2 : v i X r a\n\nDense Passage Retrieval for Open-Domain Question Answering\n\nVladimir Karpukhin∗, Barlas O˘guz∗, Sewon Min†, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen‡, Wen-tau Yih\n\nFacebook AI\n\n†University of Washington\n\n‡Princeton University\n\n{vladk, barlaso, plewis, ledell, edunov, scottyih}@fb.com sewon@cs.washington.edu danqic@cs.princeton.edu\n\nAbstract\n\nOpen-domain question answering relies on ef- ficient passage retrieval to select candidate contexts, where traditional sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can be practically implemented us- ing dense representations alone, where em- beddings are learned from a small number of questions and passages by a simple dual- encoder framework. When evaluated on a wide range of open-domain QA datasets, our dense retriever outperforms a strong Lucene- BM25 system greatly by 9%-19% absolute in terms of top-20 passage retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA benchmarks.1\n\n1", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - } - }, - { - "type": "CompositeElement", - "element_id": "3ef998ac1d905d8ff1016f96a243295c", - "text": "Introduction\n\nOpen-domain question answering (QA) (Voorhees, 1999) is a task that answers factoid questions us- ing a large collection of documents. While early QA systems are often complicated and consist of multiple components (Ferrucci (2012); Moldovan et al. (2003), inter alia), the advances of reading comprehension models suggest a much simplified two-stage framework: (1) a context retriever first selects a small subset of passages where some of them contain the answer to the question, and then (2) a machine reader can thoroughly exam- ine the retrieved contexts and identify the correct answer (Chen et al., 2017). Although reducing open-domain QA to machine reading is a very rea- sonable strategy, a huge performance degradation is often observed in practice2, indicating the needs of improving retrieval.\n\n∗Equal contribution 1The code and trained models have been released at\n\nhttps://github.com/facebookresearch/DPR.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - } - }, - { - "type": "CompositeElement", - "element_id": "71b12f58c99f6097b17f4d5b6147201b", - "text": "2For instance, the exact match score on SQuAD v1.1 drops\n\nRetrieval in open-domain QA is usually imple- mented using TF-IDF or BM25 (Robertson and Zaragoza, 2009), which matches keywords effi- ciently with an inverted index and can be seen as representing the question and context in high- dimensional, sparse vectors (with weighting). Con- versely, the dense, latent semantic encoding is com- plementary to sparse representations by design. For example, synonyms or paraphrases that consist of completely different tokens may still be mapped to vectors close to each other. Consider the question “Who is the bad guy in lord of the rings?”, which can be answered from the context “Sala Baker is best known for portraying the villain Sauron in the Lord of the Rings trilogy.” A term-based system would have difficulty retrieving such a context, while a dense retrieval system would be able to better match “bad guy” with “villain” and fetch the cor- rect context. Dense encodings are also learnable by adjusting the embedding functions, which pro- vides additional flexibility to have a task-specific representation. With special in-memory data struc- tures and indexing schemes, retrieval can be done efficiently using maximum inner product search (MIPS) algorithms (e.g., Shrivastava and Li (2014); Guo et al. (2016)).\n\nHowever, it is generally believed that learn- ing a good dense vector representation needs a large number of labeled pairs of question and con- texts. Dense retrieval methods have thus never be shown to outperform TF-IDF/BM25 for open- domain QA before ORQA (Lee et al., 2019), which proposes a sophisticated inverse cloze task (ICT) objective, predicting the blocks that contain the masked sentence, for additional pretraining. The question encoder and the reader model are then fine- tuned using pairs of questions and answers jointly. Although ORQA successfully demonstrates that dense retrieval can outperform BM25, setting new state-of-the-art results on multiple open-domain", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - } - }, - { - "type": "CompositeElement", - "element_id": "ef458b0b4659bfd57b11fbfb571c38d1", - "text": "from above 80% to less than 40% (Yang et al., 2019a).\n\nQA datasets, it also suffers from two weaknesses. First, ICT pretraining is computationally intensive and it is not completely clear that regular sentences are good surrogates of questions in the objective function. Second, because the context encoder is not fine-tuned using pairs of questions and answers, the corresponding representations could be subop- timal.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - } - }, - { - "type": "CompositeElement", - "element_id": "4204154eefaa843f79edc96dcc208054", - "text": "In this paper, we address the question: can we train a better dense embedding model using only pairs of questions and passages (or answers), with- out additional pretraining? By leveraging the now standard BERT pretrained model (Devlin et al., 2019) and a dual-encoder architecture (Bromley et al., 1994), we focus on developing the right training scheme using a relatively small number of question and passage pairs. Through a series of careful ablation studies, our final solution is surprisingly simple: the embedding is optimized for maximizing inner products of the question and relevant passage vectors, with an objective compar- ing all pairs of questions and passages in a batch. Our Dense Passage Retriever (DPR) is exception- ally strong. It not only outperforms BM25 by a large margin (65.2% vs. 42.9% in Top-5 accuracy), but also results in a substantial improvement on the end-to-end QA accuracy compared to ORQA (41.5% vs. 33.3%) in the open Natural Questions setting (Lee et al., 2019; Kwiatkowski et al., 2019). Our contributions are twofold. First, we demon- strate that with the proper training setup, sim- ply fine-tuning the question and passage encoders on existing question-passage pairs is sufficient to greatly outperform BM25. Our empirical results also suggest that additional pretraining may not be needed. Second, we verify that, in the context of open-domain question answering, a higher retrieval precision indeed translates to a higher end-to-end QA accuracy. By applying a modern reader model to the top retrieved passages, we achieve compara- ble or better results on multiple QA datasets in the open-retrieval setting, compared to several, much complicated systems.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - } - }, - { - "type": "CompositeElement", - "element_id": "43198ac980a699b3b17c5f229aee8656", - "text": "2 Background\n\nThe problem of open-domain QA studied in this paper can be described as follows. Given a factoid question, such as “Who first voiced Meg on Family Guy?” or “Where was the 8th Dalai Lama born?”, a system is required to answer it using a large corpus of diversified topics. More specifically, we assume\n\nthe extractive QA setting, in which the answer is restricted to a span appearing in one or more pas- sages in the corpus. Assume that our collection contains D documents, d1, d2, · · · , dD. We first split each of the documents into text passages of equal lengths as the basic retrieval units3 and get M total passages in our corpus C = {p1, p2, . . . , pM }, where each passage pi can be viewed as a sequence 2 , · · · , w(i) 1 , w(i) of tokens w(i) |pi|. Given a question q, the task is to find a span w(i) s+1, · · · , w(i) s , w(i) from one of the passages pi that can answer the question. Notice that to cover a wide variety of domains, the corpus size can easily range from millions of docu- ments (e.g., Wikipedia) to billions (e.g., the Web). As a result, any open-domain QA system needs to include an efficient retriever component that can se- lect a small set of relevant texts, before applying the reader to extract the answer (Chen et al., 2017).4 Formally speaking, a retriever R : (q, C) → CF is a function that takes as input a question q and a corpus C and returns a much smaller filter set of texts CF ⊂ C, where |CF | = k (cid:28) |C|. For a fixed k, a retriever can be evaluated in isolation on top-k retrieval accuracy, which is the fraction of ques- tions for which CF contains a span that answers the question.\n\ne", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - } - }, - { - "type": "CompositeElement", - "element_id": "82cfad702e5779169139f705fd0af5ee", - "text": "3 Dense Passage Retriever (DPR)\n\nWe focus our research in this work on improv- ing the retrieval component in open-domain QA. Given a collection of M text passages, the goal of our dense passage retriever (DPR) is to index all the passages in a low-dimensional and continuous space, such that it can retrieve efficiently the top k passages relevant to the input question for the reader at run-time. Note that M can be very large (e.g., 21 million passages in our experiments, de- scribed in Section 4.1) and k is usually small, such as 20–100.\n\n3.1 Overview\n\nOur dense passage retriever (DPR) uses a dense encoder EP (·) which maps any text passage to a d- dimensional real-valued vectors and builds an index for all the M passages that we will use for retrieval.\n\n3The ideal size and boundary of a text passage are func- tions of both the retriever and reader. We also experimented with natural paragraphs in our preliminary trials and found that using fixed-length passages performs better in both retrieval and final QA accuracy, as observed by Wang et al. (2019).\n\n4Exceptions include (Seo et al., 2019) and (Roberts et al., 2020), which retrieves and generates the answers, respectively.", - "metadata": { - "data_source": { - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/multi-column-2p.pdf", - "permissions_data": [ - { - "mode": 33188 - } - ] - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - } - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.html.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.html.json deleted file mode 100644 index 85d4483f53..0000000000 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.html.json +++ /dev/null @@ -1,90 +0,0 @@ -[ - { - "element_id": "a59f117741c76dca0bc8f5ee72e2010b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "example-docs/fake-html-cp1252.html" - }, - "filetype": "text/html", - "languages": [ - "por", - "cat", - "eng", - "vie" - ] - }, - "text": "My First Heading", - "type": "Title" - }, - { - "element_id": "82eda2671c5ead903683b67b0f8e3f29", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "example-docs/fake-html-cp1252.html" - }, - "filetype": "text/html", - "languages": [ - "por", - "cat", - "eng", - "vie" - ] - }, - "text": "My first paragraph.", - "type": "Title" - }, - { - "element_id": "9f76e487d5df3f6c4ce8ea2ece61057f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "example-docs/fake-html-cp1252.html" - }, - "filetype": "text/html", - "languages": [ - "por", - "cat", - "eng", - "vie" - ] - }, - "text": "Some CP1252-specific characters:", - "type": "Title" - }, - { - "element_id": "a7394a14aa8bf2dae179420d96ac755c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "example-docs/fake-html-cp1252.html" - }, - "filetype": "text/html", - "languages": [ - "por", - "cat", - "eng", - "vie" - ] - }, - "text": "¡\t¢\t£\t¤\t¥\t¦\t§\t¨\t©\tª\t«\t¬\tSHY\t®\t¯\n°\t±\t²\t³\t´\tµ\t¶\t·\t¸\t¹\tº\t»\t¼\t½\t¾\t¿\nÀ\tÁ\tÂ\tÃ\tÄ\tÅ\tÆ\tÇ\tÈ\tÉ\tÊ\tË\tÌ\tÍ\tÎ\tÏ\nÐ\tÑ\tÒ\tÓ\tÔ\tÕ\tÖ\t×\tØ\tÙ\tÚ\tÛ\tÜ\tÝ\tÞ\tß\nà\tá\tâ\tã\tä\tå\tæ\tç\tè\té\tê\të\tì\tí\tî\tï\nð\tñ\tò\tó\tô\tõ\tö\t÷\tø\tù\tú\tû\tü\tý\tþ\tÿ", - "type": "NarrativeText" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.json new file mode 100644 index 0000000000..8490fbea67 --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-encoding/fake-html-cp1252.json @@ -0,0 +1,98 @@ +[ + { + "type": "Title", + "element_id": "a59f117741c76dca0bc8f5ee72e2010b", + "text": "My First Heading", + "metadata": { + "languages": [ + "por", + "cat", + "eng", + "vie" + ], + "filetype": "text/html", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/fake-html-cp1252.html" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "82eda2671c5ead903683b67b0f8e3f29", + "text": "My first paragraph.", + "metadata": { + "languages": [ + "por", + "cat", + "eng", + "vie" + ], + "filetype": "text/html", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/fake-html-cp1252.html" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "9f76e487d5df3f6c4ce8ea2ece61057f", + "text": "Some CP1252-specific characters:", + "metadata": { + "languages": [ + "por", + "cat", + "eng", + "vie" + ], + "filetype": "text/html", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/fake-html-cp1252.html" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a7394a14aa8bf2dae179420d96ac755c", + "text": "\u00a1\t\u00a2\t\u00a3\t\u00a4\t\u00a5\t\u00a6\t\u00a7\t\u00a8\t\u00a9\t\u00aa\t\u00ab\t\u00ac\tSHY\t\u00ae\t\u00af\n\u00b0\t\u00b1\t\u00b2\t\u00b3\t\u00b4\t\u00b5\t\u00b6\t\u00b7\t\u00b8\t\u00b9\t\u00ba\t\u00bb\t\u00bc\t\u00bd\t\u00be\t\u00bf\n\u00c0\t\u00c1\t\u00c2\t\u00c3\t\u00c4\t\u00c5\t\u00c6\t\u00c7\t\u00c8\t\u00c9\t\u00ca\t\u00cb\t\u00cc\t\u00cd\t\u00ce\t\u00cf\n\u00d0\t\u00d1\t\u00d2\t\u00d3\t\u00d4\t\u00d5\t\u00d6\t\u00d7\t\u00d8\t\u00d9\t\u00da\t\u00db\t\u00dc\t\u00dd\t\u00de\t\u00df\n\u00e0\t\u00e1\t\u00e2\t\u00e3\t\u00e4\t\u00e5\t\u00e6\t\u00e7\t\u00e8\t\u00e9\t\u00ea\t\u00eb\t\u00ec\t\u00ed\t\u00ee\t\u00ef\n\u00f0\t\u00f1\t\u00f2\t\u00f3\t\u00f4\t\u00f5\t\u00f6\t\u00f7\t\u00f8\t\u00f9\t\u00fa\t\u00fb\t\u00fc\t\u00fd\t\u00fe\t\u00ff", + "metadata": { + "languages": [ + "por", + "cat", + "eng", + "vie" + ], + "filetype": "text/html", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/fake-html-cp1252.html" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.jpg.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.json similarity index 62% rename from test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.jpg.json rename to test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.json index 5575a6de9b..78e2ebd6e6 100644 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.jpg.json +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper-with-table.json @@ -1,473 +1,503 @@ [ { + "type": "Title", "element_id": "5d45a28d875e403c7294a15f22a0162f", + "text": "LayoutParser: A Unified Toolkit for DL-Based DIA 5", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA 5", - "type": "Title" - }, - { - "element_id": "d9d53799fbfc3f90096f9dc9d45ff667", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "d9d53799fbfc3f90096f9dc9d45ff667", + "text": "Table 1: Current layout detection models in the LayoutParser model zoo", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Table 1: Current layout detection models in the LayoutParser model zoo", - "type": "FigureCaption" - }, - { - "element_id": "dddac446da6c93dc1449ecb5d997c423", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, - "filetype": "image/jpeg", - "languages": [ - "eng" - ], - "page_number": 1, + ] + } + } + }, + { + "type": "Table", + "element_id": "dddac446da6c93dc1449ecb5d997c423", + "text": "Dataset | Base Model\" Large Model | Notes PubLayNet [38] P/M M Layouts of modern scientific documents PRImA [3) M - Layouts of scanned modern magazines and scientific reports Newspaper [17] P - Layouts of scanned US newspapers from the 20th century \u2018TableBank (18) P P Table region on modern scientific and business document HJDataset (31) | F/M - Layouts of history Japanese documents", + "metadata": { + "text_as_html": "
Dataset| Base Model!|Large Model| Notes
PubLayNet [33]P/MMLayouts of modern scientific documents
PRImA [3]MLayouts of scanned modern magazines and scientific reports
Newspaper [17]PLayouts of scanned US newspapers from the 20th century
TableBank [18]PTable region on modern scientific and business document
HIDataset [31]P/MLayouts of history Japanese documents
", "table_as_cells": [ { - "content": "Dataset", - "h": 1, - "w": 1, "x": 0, - "y": 0 + "y": 0, + "w": 1, + "h": 1, + "content": "Dataset" }, { - "content": "PubLayNet [33]", - "h": 1, - "w": 1, "x": 0, - "y": 1 + "y": 1, + "w": 1, + "h": 1, + "content": "PubLayNet [33]" }, { - "content": "PRImA [3]", - "h": 1, - "w": 1, "x": 0, - "y": 2 + "y": 2, + "w": 1, + "h": 1, + "content": "PRImA [3]" }, { - "content": "Newspaper [17]", - "h": 1, - "w": 1, "x": 0, - "y": 3 + "y": 3, + "w": 1, + "h": 1, + "content": "Newspaper [17]" }, { - "content": "TableBank [18]", - "h": 1, - "w": 1, "x": 0, - "y": 4 + "y": 4, + "w": 1, + "h": 1, + "content": "TableBank [18]" }, { - "content": "HIDataset [31]", - "h": 1, - "w": 1, "x": 0, - "y": 5 + "y": 5, + "w": 1, + "h": 1, + "content": "HIDataset [31]" }, { - "content": "| Base Model!|", - "h": 1, - "w": 1, "x": 1, - "y": 0 + "y": 0, + "w": 1, + "h": 1, + "content": "| Base Model!|" }, { - "content": "P/M", - "h": 1, - "w": 1, "x": 1, - "y": 1 + "y": 1, + "w": 1, + "h": 1, + "content": "P/M" }, { - "content": "M", - "h": 1, - "w": 1, "x": 1, - "y": 2 + "y": 2, + "w": 1, + "h": 1, + "content": "M" }, { - "content": "P", - "h": 1, - "w": 1, "x": 1, - "y": 3 + "y": 3, + "w": 1, + "h": 1, + "content": "P" }, { - "content": "P", - "h": 1, - "w": 1, "x": 1, - "y": 4 + "y": 4, + "w": 1, + "h": 1, + "content": "P" }, { - "content": "P/M", - "h": 1, - "w": 1, "x": 1, - "y": 5 + "y": 5, + "w": 1, + "h": 1, + "content": "P/M" }, { - "content": "Large Model", - "h": 1, - "w": 1, "x": 2, - "y": 0 + "y": 0, + "w": 1, + "h": 1, + "content": "Large Model" }, { - "content": "M", - "h": 1, - "w": 1, "x": 2, - "y": 1 + "y": 1, + "w": 1, + "h": 1, + "content": "M" }, { - "content": "", - "h": 1, - "w": 1, "x": 2, - "y": 2 + "y": 2, + "w": 1, + "h": 1, + "content": "" }, { - "content": "", - "h": 1, - "w": 1, "x": 2, - "y": 3 + "y": 3, + "w": 1, + "h": 1, + "content": "" }, { - "content": "", - "h": 1, - "w": 1, "x": 2, - "y": 4 + "y": 4, + "w": 1, + "h": 1, + "content": "" }, { - "content": "", - "h": 1, - "w": 1, "x": 2, - "y": 5 + "y": 5, + "w": 1, + "h": 1, + "content": "" }, { - "content": "| Notes", - "h": 1, - "w": 1, "x": 3, - "y": 0 + "y": 0, + "w": 1, + "h": 1, + "content": "| Notes" }, { - "content": "Layouts of modern scientific documents", - "h": 1, - "w": 1, "x": 3, - "y": 1 + "y": 1, + "w": 1, + "h": 1, + "content": "Layouts of modern scientific documents" }, { - "content": "Layouts of scanned modern magazines and scientific reports", - "h": 1, - "w": 1, "x": 3, - "y": 2 + "y": 2, + "w": 1, + "h": 1, + "content": "Layouts of scanned modern magazines and scientific reports" }, { - "content": "Layouts of scanned US newspapers from the 20th century", - "h": 1, - "w": 1, "x": 3, - "y": 3 + "y": 3, + "w": 1, + "h": 1, + "content": "Layouts of scanned US newspapers from the 20th century" }, { - "content": "Table region on modern scientific and business document", - "h": 1, - "w": 1, "x": 3, - "y": 4 + "y": 4, + "w": 1, + "h": 1, + "content": "Table region on modern scientific and business document" }, { - "content": "Layouts of history Japanese documents", - "h": 1, - "w": 1, "x": 3, - "y": 5 + "y": 5, + "w": 1, + "h": 1, + "content": "Layouts of history Japanese documents" } ], - "text_as_html": "
Dataset| Base Model!|Large Model| Notes
PubLayNet [33]P/MMLayouts of modern scientific documents
PRImA [3]MLayouts of scanned modern magazines and scientific reports
Newspaper [17]PLayouts of scanned US newspapers from the 20th century
TableBank [18]PTable region on modern scientific and business document
HIDataset [31]P/MLayouts of history Japanese documents
" - }, - "text": "Dataset | Base Model\" Large Model | Notes PubLayNet [38] P/M M Layouts of modern scientific documents PRImA [3) M - Layouts of scanned modern magazines and scientific reports Newspaper [17] P - Layouts of scanned US newspapers from the 20th century ‘TableBank (18) P P Table region on modern scientific and business document HJDataset (31) | F/M - Layouts of history Japanese documents", - "type": "Table" - }, - { - "element_id": "e5314387378c7a98911d71c145c45327", - "metadata": { + "filetype": "image/jpeg", + "languages": [ + "eng" + ], + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "e5314387378c7a98911d71c145c45327", + "text": "2", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "2", - "type": "UncategorizedText" - }, - { - "element_id": "e262996994d01c45f0d6ef28cb8afa93", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "e262996994d01c45f0d6ef28cb8afa93", + "text": "For each dataset, we train several models of different sizes for different needs (the trade-off between accuracy vs. computational cost). For \u201cbase model\u201d and \u201clarge model\u201d, we refer to using the ResNet 50 or ResNet 101 backbones [13], respectively. One can train models of different architectures, like Faster R-CNN [28] (P) and Mask R-CNN [12] (M). For example, an F in the Large Model column indicates it has m Faster R-CNN model trained using the ResNet 101 backbone. The platform is maintained and a number of additions will be made to the model zoo in coming months.", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "For each dataset, we train several models of different sizes for different needs (the trade-off between accuracy vs. computational cost). For “base model” and “large model”, we refer to using the ResNet 50 or ResNet 101 backbones [13], respectively. One can train models of different architectures, like Faster R-CNN [28] (P) and Mask R-CNN [12] (M). For example, an F in the Large Model column indicates it has m Faster R-CNN model trained using the ResNet 101 backbone. The platform is maintained and a number of additions will be made to the model zoo in coming months.", - "type": "FigureCaption" - }, - { - "element_id": "2298258fe84201e839939d70c168141b", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2298258fe84201e839939d70c168141b", + "text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and stomge of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component.", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and stomge of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component.", - "type": "NarrativeText" - }, - { - "element_id": "24d2473c4975fedd3f5cfd3026249837", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "Title", + "element_id": "24d2473c4975fedd3f5cfd3026249837", + "text": "3.1 Layout Detection Models", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "3.1 Layout Detection Models", - "type": "Title" - }, - { - "element_id": "008c0a590378dccd98ae7a5c49905eda", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "008c0a590378dccd98ae7a5c49905eda", + "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Different from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Different from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:", - "type": "NarrativeText" - }, - { - "element_id": "b98aac79b1c1af144f6ed563e6510fd4", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "ListItem", + "element_id": "b98aac79b1c1af144f6ed563e6510fd4", + "text": "import layoutparser as lp", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "import layoutparser as lp", - "type": "ListItem" - }, - { - "element_id": "44691a14713d40ea25a0401490ed7b5e", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "Title", + "element_id": "44691a14713d40ea25a0401490ed7b5e", + "text": "wwe", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "wwe", - "type": "Title" - }, - { - "element_id": "e14922762abe8a044371efcab13bdcc9", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "ListItem", + "element_id": "e14922762abe8a044371efcab13bdcc9", + "text": "image = cv2.imread(\"image_file\") # load images", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "image = cv2.imread(\"image_file\") # load images", - "type": "ListItem" - }, - { - "element_id": "986e6a00c43302413ca0ad4badd5bca8", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "ListItem", + "element_id": "986e6a00c43302413ca0ad4badd5bca8", + "text": "model = lp. Detectron2LayoutModel (", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "model = lp. Detectron2LayoutModel (", - "type": "ListItem" - }, - { - "element_id": "d50233678a0d15373eb47ab537d3c11e", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "ListItem", + "element_id": "d50233678a0d15373eb47ab537d3c11e", + "text": "ea \"lp: //PubLayNet/faster_rcnn_R_50_FPN_3x/config\")", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "ea \"lp: //PubLayNet/faster_rcnn_R_50_FPN_3x/config\")", - "type": "ListItem" - }, - { - "element_id": "11dccdd53ee27c94e976b875d2d6e40d", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "ListItem", + "element_id": "11dccdd53ee27c94e976b875d2d6e40d", + "text": "layout = model.detect (image)", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "layout = model.detect (image)", - "type": "ListItem" - }, - { - "element_id": "bb86a9374cb6126db4088d1092557d09", - "metadata": { + "page_number": 1, "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" - }, + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bb86a9374cb6126db4088d1092557d09", + "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering different languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are significantly different from the training dataset. As document structures and layouts vary greatly in different domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in Layout Parser, using both the dataset name and model name 1p:///.", + "metadata": { "filetype": "image/jpeg", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering different languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are significantly different from the training dataset. As document structures and layouts vary greatly in different domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in Layout Parser, using both the dataset name and model name 1p:///.", - "type": "NarrativeText" + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper-with-table.jpg" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.json new file mode 100644 index 0000000000..e7dc78f839 --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.json @@ -0,0 +1,3236 @@ +[ + { + "type": "UncategorizedText", + "element_id": "d3ce55f220dfb75891b4394a18bcb973", + "text": "1 2 0 2", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Header", + "element_id": "d8294655784148f3059eb08db918977c", + "text": "n u J 1 2 ] V C . s c [", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "c0cdc594eccc53cfb75eeef0ad75b65b", + "text": "2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "4467e9baee9456824c1aa679526f6979", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for Deep Learning Based Document Image Analysis", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "33dff5d4db499a435f61220a890d3f04", + "text": "Zejiang Shen! (4), Ruochen Zhang\u201d, Melissa Dell?, Benjamin Charles Germain Lee*, Jacob Carlson\u2019, and Weining Li>", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a0bbb18d9710661eb9e2aa6e651e6555", + "text": "1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ea0cc468a56f4af65b968fb86913bdfe", + "text": "Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model con\ufb01gurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going e\ufb00orts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "370e1b61d1dab8ae35d62eb6f42feceb", + "text": "Keywords: Document Image Analysis \u00b7 Deep Learning \u00b7 Layout Analysis \u00b7 Character Recognition \u00b7 Open Source library \u00b7 Toolkit.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "f12febfe29a59a8e4ce6b3494d6deb8a", + "text": "Introduction", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fd81374ba214b5472d0b60b2371ae8df", + "text": "Deep Learning(DL)-based approaches are the state-of-the-art for a wide range of document image analysis (DIA) tasks including document image classi\ufb01cation [11,", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "e7e0acf197e89d650d39fa3ce929509e", + "text": "2 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4b097cc42d7d30e720512dbce0cb4905", + "text": "37], layout detection [38, 22], table detection [26], and scene text detection [4]. A generalized learning-based framework dramatically reduces the need for the manual speci\ufb01cation of complicated rules, which is the status quo with traditional methods. DL has the potential to transform DIA pipelines and bene\ufb01t a broad spectrum of large-scale document digitization projects.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "45844a4901777afaf6de9a0994e017eb", + "text": "However, there are several practical di\ufb03culties for taking advantages of re- cent advances in DL-based methods: 1) DL models are notoriously convoluted for reuse and extension. Existing models are developed using distinct frame- works like TensorFlow [1] or PyTorch [24], and the high-level parameters can be obfuscated by implementation details [8]. It can be a time-consuming and frustrating experience to debug, reproduce, and adapt existing models for DIA, and many researchers who would bene\ufb01t the most from using these methods lack the technical background to implement them from scratch. 2) Document images contain diverse and disparate patterns across domains, and customized training is often required to achieve a desirable detection accuracy. Currently there is no full-\ufb02edged infrastructure for easily curating the target document image datasets and \ufb01ne-tuning or re-training the models. 3) DIA usually requires a sequence of models and other processing to obtain the \ufb01nal outputs. Often research teams use DL models and then perform further document analyses in separate processes, and these pipelines are not documented in any central location (and often not documented at all). This makes it di\ufb03cult for research teams to learn about how full pipelines are implemented and leads them to invest signi\ufb01cant resources in reinventing the DIA wheel.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6f3c8d55dd5a4f95d8a59d146ca9ffa7", + "text": "LayoutParser provides a uni\ufb01ed toolkit to support DL-based document image analysis and processing. To address the aforementioned challenges, LayoutParser is built with the following components:", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "9ce12a49c1a9972b4cd2c3f66595b2b6", + "text": "1. An o\ufb00-the-shelf toolkit for applying DL models for layout detection, character recognition, and other DIA tasks (Section 3)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "40f42a96bdd1559e09d74090c0fe9df3", + "text": "2. A rich repository of pre-trained neural network models (Model Zoo) that underlies the o\ufb00-the-shelf usage", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "0ca448d3ae0c4ee73bf46e8edfcd417d", + "text": "3. Comprehensive tools for e\ufb03cient document image data annotation and model tuning to support di\ufb00erent levels of customization", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "7a9de9b00d51bd670feccc2eb84a147e", + "text": "4. A DL model hub and community platform for the easy sharing, distribu- tion, and discussion of DIA models and pipelines, to promote reusability, reproducibility, and extensibility (Section 4)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8e216e91ff3471241858f1df445cdf0a", + "text": "The library implements simple and intuitive Python APIs without sacri\ufb01cing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will bene\ufb01t a variety of end-users, and will lead to advances in applications in both industry and academic research.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "583775f22c8080098beebbef960e2fbf", + "text": "LayoutParser is well aligned with recent e\ufb00orts for improving DL model reusability in other disciplines like natural language processing [8, 34] and com- puter vision [35], but with a focus on unique challenges in DIA. We show LayoutParser can be applied in sophisticated and large-scale digitization projects", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 2, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Header", + "element_id": "f5a6697190c20bf6030d8e4ae8f6861a", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "50846086f4d9ece02052735686278699", + "text": "that require precision, e\ufb03ciency, and robustness, as well as simple and light- weight document processing tasks focusing on e\ufb03cacy and \ufb02exibility (Section 5). LayoutParser is being actively maintained, and support for more deep learning models and novel methods in text-based layout analysis methods [37, 34] is planned.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0ce686208eb4aba70d0cd053d50c7bc2", + "text": "The rest of the paper is organized as follows. Section 2 provides an overview of related work. The core LayoutParser library, DL Model Zoo, and customized model training are described in Section 3, and the DL model hub and commu- nity platform are detailed in Section 4. Section 5 shows two examples of how LayoutParser can be used in practical DIA projects, and Section 6 concludes.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "1548efaaa18cf819f9498d76a0440316", + "text": "2 Related Work", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8153390c1bb8652313be64034531449e", + "text": "Recently, various DL models and datasets have been developed for layout analysis tasks. The dhSegment [22] utilizes fully convolutional networks [20] for segmen- tation tasks on historical documents. Object detection-based methods like Faster R-CNN [28] and Mask R-CNN [12] are used for identifying document elements [38] and detecting tables [30, 26]. Most recently, Graph Neural Networks [29] have also been used in table detection [27]. However, these models are usually implemented individually and there is no uni\ufb01ed framework to load and use such models.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "45d6d494603e84706884918c1f785c9f", + "text": "There has been a surge of interest in creating open-source tools for document image processing: a search of document image analysis in Github leads to 5M relevant code pieces 6; yet most of them rely on traditional rule-based methods or provide limited functionalities. The closest prior research to our work is the OCR-D project7, which also tries to build a complete toolkit for DIA. However, similar to the platform developed by Neudecker et al. [21], it is designed for analyzing historical documents, and provides no supports for recent DL models. The DocumentLayoutAnalysis project8 focuses on processing born-digital PDF documents via analyzing the stored PDF data. Repositories like DeepLayout9 and Detectron2-PubLayNet10 are individual deep learning models trained on layout analysis datasets without support for the full DIA pipeline. The Document Analysis and Exploitation (DAE) platform [15] and the DeepDIVA project [2] aim to improve the reproducibility of DIA methods (or DL models), yet they are not actively maintained. OCR engines like Tesseract [14], easyOCR11 and paddleOCR12 usually do not come with comprehensive functionalities for other DIA tasks like layout analysis.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "73feaff827cbc7089d3f95d1e5aac6aa", + "text": "Recent years have also seen numerous e\ufb00orts to create libraries for promoting reproducibility and reusability in the \ufb01eld of DL. Libraries like Dectectron2 [35],", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e55d055f6205d93e21c673d749264e7a", + "text": "6 The number shown is obtained by specifying the search type as \u2018code\u2019. 7 https://ocr-d.de/en/about 8 https://github.com/BobLd/DocumentLayoutAnalysis 9 https://github.com/leonlulu/DeepLayout 10 https://github.com/hpanwar08/detectron2 11 https://github.com/JaidedAI/EasyOCR 12 https://github.com/PaddlePaddle/PaddleOCR", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "fe238f610fe610b8ce1abaa08a0e3e63", + "text": "4", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "92c4289ad4af7c0793e40d5662707e0a", + "text": "Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "642416e5d6c99219b16dbba6f72392c5", + "text": "Efficient Data Annotation Model Customization Document Images Community Platform \u2018a >) \u00a5 DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | \u2014\u2014= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2172d9b276cd7a485dea4978805815d8", + "text": "Fig. 1: The overall architecture of LayoutParser. For an input document image, the core LayoutParser library provides a set of o\ufb00-the-shelf tools for layout detection, OCR, visualization, and storage, backed by a carefully designed layout data structure. LayoutParser also supports high level customization via e\ufb03cient layout annotation and model training functions. These improve model accuracy on the target samples. The community platform enables the easy sharing of DIA models and whole digitization pipelines to promote reusability and reproducibility. A collection of detailed documentation, tutorials and exemplar projects make LayoutParser easy to learn and use.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b4948db85ca791e99aa92589fc41734f", + "text": "AllenNLP [8] and transformers [34] have provided the community with complete DL-based support for developing and deploying models for general computer vision and natural language processing problems. LayoutParser, on the other hand, specializes speci\ufb01cally in DIA tasks. LayoutParser is also equipped with a community platform inspired by established model hubs such as Torch Hub [23] and TensorFlow Hub [1]. It enables the sharing of pretrained models as well as full document processing pipelines that are unique to DIA tasks.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7651db80014a85ab253367d3bd3e4f88", + "text": "There have been a variety of document data collections to facilitate the development of DL models. Some examples include PRImA [3](magazine layouts), PubLayNet [38](academic paper layouts), Table Bank [18](tables in academic papers), Newspaper Navigator Dataset [16, 17](newspaper \ufb01gure layouts) and HJDataset [31](historical Japanese document layouts). A spectrum of models trained on these datasets are currently available in the LayoutParser model zoo to support di\ufb00erent use cases.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "5a1838a8f40b4523094652cf14ab974c", + "text": "3 The Core LayoutParser Library", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "47e45d28d96fc14ddc709835de35ece5", + "text": "At the core of LayoutParser is an o\ufb00-the-shelf toolkit that streamlines DL- based document image analysis. Five components support a simple interface with comprehensive functionalities: 1) The layout detection models enable using pre-trained or self-trained DL models for layout detection with just four lines of code. 2) The detected layout information is stored in carefully engineered", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "cd1112d2b15a0d27a29b1c83b2afd0dd", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "0b9956dc7ccd1d758263217beda63196", + "text": "Table 1: Current layout detection models in the LayoutParser model zoo", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Table", + "element_id": "2a62c55be8401908c18140e858ec3345", + "text": "Dataset Base Model1 Large Model Notes PubLayNet [38] PRImA [3] Newspaper [17] TableBank [18] HJDataset [31] F / M M F F F / M M - - F - Layouts of modern scienti\ufb01c documents Layouts of scanned modern magazines and scienti\ufb01c reports Layouts of scanned US newspapers from the 20th century Table region on modern scienti\ufb01c and business document Layouts of history Japanese documents", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "f978160527177fa39c13774ec8dfa9cb", + "text": "1 For each dataset, we train several models of di\ufb00erent sizes for di\ufb00erent needs (the trade-o\ufb00 between accuracy vs. computational cost). For \u201cbase model\u201d and \u201clarge model\u201d, we refer to using the ResNet 50 or ResNet 101 backbones [13], respectively. One can train models of di\ufb00erent architectures, like Faster R-CNN [28] (F) and Mask R-CNN [12] (M). For example, an F in the Large Model column indicates it has a Faster R-CNN model trained using the ResNet 101 backbone. The platform is maintained and a number of additions will be made to the model zoo in coming months.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "55b33df7609960c3552a0b7bc1a5a9c6", + "text": "layout data structures, which are optimized for e\ufb03ciency and versatility. 3) When necessary, users can employ existing or customized OCR models via the uni\ufb01ed API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and storage of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "6e9df774416cc71548308e324b4bdbb7", + "text": "3.1 Layout Detection Models", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bbcc10c2b92de0cbdce8629f18b0d7ad", + "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Di\ufb00erent from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a25137fdc995e079684174269dc0effa", + "text": "1 import layoutparser as lp 2 image = cv2 . imread ( \" image_file \" ) # load images 3 model = lp . De t e c tro n2 Lay outM odel ( \" lp :// PubLayNet / f as t er _ r c nn _ R _ 50 _ F P N_ 3 x / config \" ) 4 5 layout = model . detect ( image )", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "59171bb0b4a32c9ec1b0e1d327ddb88f", + "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering di\ufb00erent languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are signi\ufb01cantly di\ufb00erent from the training dataset. As document structures and layouts vary greatly in di\ufb00erent domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in LayoutParser, using both the dataset name and model name lp:///.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 5, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "676118b62c2261113a23a610c2ac50cb", + "text": "6", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "710ac103981c6363195774b02ee582d4", + "text": "Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "6eb2bb6ca50b3be177565f9ff546bce8", + "text": "- \u00b0 . 3 a a 4 a 3 oo er \u2018 2 \u00a7 8 a 8 3 3 \u2018 \u00a3 4 A g a 9 \u2018 3 \u00a5 Coordinate g 4 5 3 + \u00a7 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower \u00b0 & a \u00a2 o [ coordinatel textblock1, 3 3 \u2019 g Q 3 , textblock2 , layoutl ] 4 q \u00ae A list of the layout elements Ff", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "c2a2a4a054151d16820f38e115ce7a72", + "text": "Fig. 2: The relationship between the three types of layout data structures. Coordinate supports three kinds of variation; TextBlock consists of the co- ordinate information and extra features like block text, types, and reading orders; a Layout object is a list of all possible layout elements, including other Layout objects. They all support the same set of transformation and operation APIs for maximum \ufb02exibility.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d997f63fd79c7e03050ca01b58dfdf0a", + "text": "Shown in Table 1, LayoutParser currently hosts 9 pre-trained models trained on 5 di\ufb00erent datasets. Description of the training dataset is provided alongside with the trained models such that users can quickly identify the most suitable models for their tasks. Additionally, when such a model is not readily available, LayoutParser also supports training customized layout models and community sharing of the models (detailed in Section 3.5).", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "836e9227ef393d8b00369e6300fbba4c", + "text": "3.2 Layout Data Structures", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "601f7d95172984c75de081023ca64c15", + "text": "A critical feature of LayoutParser is the implementation of a series of data structures and operations that can be used to e\ufb03ciently process and manipulate the layout elements. In document image analysis pipelines, various post-processing on the layout analysis model outputs is usually required to obtain the \ufb01nal outputs. Traditionally, this requires exporting DL model outputs and then loading the results into other pipelines. All model outputs from LayoutParser will be stored in carefully engineered data types optimized for further processing, which makes it possible to build an end-to-end document digitization pipeline within LayoutParser. There are three key components in the data structure, namely the Coordinate system, the TextBlock, and the Layout. They provide di\ufb00erent levels of abstraction for the layout data, and a set of APIs are supported for transformations or operations on these classes.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "48d58ed9a3d95637df68c8b810147ba1", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dcdc0dc4759bd20c04026973cbe386e2", + "text": "Coordinates are the cornerstones for storing layout information. Currently, three types of Coordinate data structures are provided in LayoutParser, shown in Figure 2. Interval and Rectangle are the most common data types and support specifying 1D or 2D regions within a document. They are parameterized with 2 and 4 parameters. A Quadrilateral class is also implemented to support a more generalized representation of rectangular regions when the document is skewed or distorted, where the 4 corner points can be speci\ufb01ed and a total of 8 degrees of freedom are supported. A wide collection of transformations like shift, pad, and scale, and operations like intersect, union, and is_in, are supported for these classes. Notably, it is common to separate a segment of the image and analyze it individually. LayoutParser provides full support for this scenario via image cropping operations crop_image and coordinate transformations like relative_to and condition_on that transform coordinates to and from their relative representations. We refer readers to Table 2 for a more detailed description of these operations13.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3f620e1ad95cd446170613ed9d780853", + "text": "Based on Coordinates, we implement the TextBlock class that stores both the positional and extra features of individual layout elements. It also supports specifying the reading orders via setting the parent \ufb01eld to the index of the parent object. A Layout class is built that takes in a list of TextBlocks and supports processing the elements in batch. Layout can also be nested to support hierarchical layout structures. They support the same operations and transformations as the Coordinate classes, minimizing both learning and deployment e\ufb00ort.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "2b81bd7a3f21b84379bfcd4bb175c5d1", + "text": "3.3 OCR", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "16565416942e53cf65f75a8a845df211", + "text": "LayoutParser provides a uni\ufb01ed interface for existing OCR tools. Though there are many OCR tools available, they are usually con\ufb01gured di\ufb00erently with distinct APIs or protocols for using them. It can be ine\ufb03cient to add new OCR tools into an existing pipeline, and di\ufb03cult to make direct comparisons among the available tools to \ufb01nd the best option for a particular project. To this end, LayoutParser builds a series of wrappers among existing OCR engines, and provides nearly the same syntax for using them. It supports a plug-and-play style of using OCR engines, making it e\ufb00ortless to switch, evaluate, and compare di\ufb00erent OCR modules:", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "373a9a67f855ba5b79bdc1393d2f1ce9", + "text": "1 ocr_agent = lp . TesseractAgent () 2 # Can be easily switched to other OCR software 3 tokens = ocr_agent . detect ( image )", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5bc3c9470dc53c60c1fd04828105afdd", + "text": "The OCR outputs will also be stored in the aforementioned layout data structures and can be seamlessly incorporated into the digitization pipeline. Currently LayoutParser supports the Tesseract and Google Cloud Vision OCR engines.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fa023ccf2ac1042ef254ecf47cc592ca", + "text": "LayoutParser also comes with a DL-based CNN-RNN OCR model [6] trained with the Connectionist Temporal Classi\ufb01cation (CTC) loss [10]. It can be used like the other OCR modules, and can be easily trained on customized datasets.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a2a0a2ef0279f0710f3cd34474ca8645", + "text": "13 This is also available in the LayoutParser documentation pages.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "5498a550b5367fa8dc935013956d09fa", + "text": "8 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a5ce184b53898a543bca90a5b0acd156", + "text": "Table 2: All operations supported by the layout elements. The same APIs are supported across di\ufb00erent layout element classes including Coordinate types, TextBlock and Layout.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Table", + "element_id": "64bc79d1132a89c71837f420d6e4e2dc", + "text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio in x and y direction block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is in(block2) Whether block1 is inside of block2 block1.intersect(block2) Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs. block1.union(block2) Return the union region of block1 and block2. Coordinate type to be determined based on the inputs. block1.relative to(block2) Convert the absolute coordinates of block1 to relative coordinates to block2 block1.condition on(block2) Calculate the absolute coordinates of block1 given the canvas block2\u2019s absolute coordinates block.crop image(image) Obtain the image segments in the block region", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "3d803300353e1055611e38b1b2eb19a7", + "text": "3.4 Storage and visualization", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "97ec9aa9ca6081a954acf13151c9239e", + "text": "The end goal of DIA is to transform the image-based document data into a structured database. LayoutParser supports exporting layout data into di\ufb00erent formats like JSON, csv, and will add the support for the METS/ALTO XML format 14 . It can also load datasets from layout analysis-speci\ufb01c formats like COCO [38] and the Page Format [25] for training layout models (Section 3.5). Visualization of the layout detection results is critical for both presentation and debugging. LayoutParser is built with an integrated API for displaying the layout information along with the original document image. Shown in Figure 3, it enables presenting layout data with rich meta information and features in di\ufb00erent modes. More detailed information can be found in the online LayoutParser documentation page.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "b3c9f96506599f418cc474db4adb5b0d", + "text": "3.5 Customized Model Training", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1a011956c708d01abb2c058ec28c126f", + "text": "Besides the o\ufb00-the-shelf library, LayoutParser is also highly customizable with supports for highly unique and challenging document analysis tasks. Target document images can be vastly di\ufb00erent from the existing datasets for train- ing layout models, which leads to low layout detection accuracy. Training data", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "05f6e98c538d1912459b1e568871e6c2", + "text": "14 https://altoxml.github.io", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 8, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "c069937e6c2bfc0f856835f3af4d6181", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "f5450580bd9ae07f4cdf7c23a6ccaf41", + "text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fadd4ad54cd14e3e4711d41a1c99f813", + "text": "Fig. 3: Layout detection and OCR results visualization generated by the LayoutParser APIs. Mode I directly overlays the layout region bounding boxes and categories over the original image. Mode II recreates the original document via drawing the OCR\u2019d texts at their corresponding positions on the image canvas. In this \ufb01gure, tokens in textual regions are \ufb01ltered using the API and then displayed.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "625c9e1d41a9740f094041595f79953d", + "text": "can also be highly sensitive and not sharable publicly. To overcome these chal- lenges, LayoutParser is built with rich features for e\ufb03cient data annotation and customized model training.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a3498730b5cd3fe9405fad69bcf37882", + "text": "LayoutParser incorporates a toolkit optimized for annotating document lay- outs using object-level active learning [32]. With the help from a layout detection model trained along with labeling, only the most important layout objects within each image, rather than the whole image, are required for labeling. The rest of the regions are automatically annotated with high con\ufb01dence predictions from the layout detection model. This allows a layout dataset to be created more e\ufb03ciently with only around 60% of the labeling budget.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c4ccf2cf2e7495668221cbe51534f90b", + "text": "After the training dataset is curated, LayoutParser supports di\ufb00erent modes for training the layout models. Fine-tuning can be used for training models on a small newly-labeled dataset by initializing the model with existing pre-trained weights. Training from scratch can be helpful when the source dataset and target are signi\ufb01cantly di\ufb00erent and a large training set is available. However, as suggested in Studer et al.\u2019s work[33], loading pre-trained weights on large-scale datasets like ImageNet [5], even from totally di\ufb00erent domains, can still boost model performance. Through the integrated API provided by LayoutParser, users can easily compare model performances on the benchmark datasets.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 9, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "59c95b02b488f297417af4125e4ac316", + "text": "10 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "6eb34afad9d568fbccde8ac8854dc24d", + "text": "Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable Column reading order HEE company type Column Categories (J tite Adress _] ree [7] Section Header Maximum Allowed Height (b) Illustration of the recreated document with dense text structure for better OCR performance", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ebbb8c84b2a69f817c8ae7df20d72dd9", + "text": "Fig. 4: Illustration of (a) the original historical Japanese document with layout detection results and (b) a recreated version of the document image that achieves much better character recognition recall. The reorganization algorithm rearranges the tokens based on the their detected bounding boxes given a maximum allowed height.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "88f6e589165656eceebf898d0240e05c", + "text": "4 LayoutParser Community Platform", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e9a86eb57ba5483acfeefb0e931402b1", + "text": "Another focus of LayoutParser is promoting the reusability of layout detection models and full digitization pipelines. Similar to many existing deep learning libraries, LayoutParser comes with a community model hub for distributing layout models. End-users can upload their self-trained models to the model hub, and these models can be loaded into a similar interface as the currently available LayoutParser pre-trained models. For example, the model trained on the News Navigator dataset [17] has been incorporated in the model hub.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c08c76705396fe7a65be5dff6d3bffd5", + "text": "Beyond DL models, LayoutParser also promotes the sharing of entire doc- ument digitization pipelines. For example, sometimes the pipeline requires the combination of multiple DL models to achieve better accuracy. Currently, pipelines are mainly described in academic papers and implementations are often not pub- licly available. To this end, the LayoutParser community platform also enables the sharing of layout pipelines to promote the discussion and reuse of techniques. For each shared pipeline, it has a dedicated project page, with links to the source code, documentation, and an outline of the approaches. A discussion panel is provided for exchanging ideas. Combined with the core LayoutParser library, users can easily build reusable components based on the shared pipelines and apply them to solve their unique problems.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "53da8301ac140e0b72cdcf6a7f405918", + "text": "5 Use Cases", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1fd6bf73b6c80f8ed034bf977fba5a67", + "text": "The core objective of LayoutParser is to make it easier to create both large-scale and light-weight document digitization pipelines. Large-scale document processing", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 10, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "ab543398222da25b3a9231929162d3a0", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4b9eddb71426681f2828832312457b67", + "text": "focuses on precision, e\ufb03ciency, and robustness. The target documents may have complicated structures, and may require training multiple layout detection models to achieve the optimal accuracy. Light-weight pipelines are built for relatively simple documents, with an emphasis on development ease, speed and \ufb02exibility. Ideally one only needs to use existing resources, and model training should be avoided. Through two exemplar projects, we show how practitioners in both academia and industry can easily build such pipelines using LayoutParser and extract high-quality structured document data for their downstream tasks. The source code for these projects will be publicly available in the LayoutParser community hub.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "54ee49eac3f4e6098811cda1f9dd0306", + "text": "5.1 A Comprehensive Historical Document Digitization Pipeline", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "07a7dcb89bd4b78209da5f28e2877a1a", + "text": "The digitization of historical documents can unlock valuable data that can shed light on many important social, economic, and historical questions. Yet due to scan noises, page wearing, and the prevalence of complicated layout structures, ob- taining a structured representation of historical document scans is often extremely complicated. In this example, LayoutParser was used to develop a comprehensive pipeline, shown in Figure 5, to gener- ate high-quality structured data from historical Japanese \ufb01rm \ufb01nancial ta- bles with complicated layouts. The pipeline applies two layout models to identify di\ufb00erent levels of document structures and two customized OCR engines for optimized character recog- nition accuracy.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "02890858b0555e354a0336addbf54a7e", + "text": "(spe peepee, \u2018Active Learning Layout Annotate Layout Dataset | + \u2018Annotation Toolkit \u00a5 a Deep Leaming Layout Model Training & Inference, \u00a5 ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <\u2014\u2014 Default ane Customized \u00a5 ee Layout Structure Visualization & Export | <\u2014\u2014 | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "42551c9b40827dcdc52055b4d25c6fc3", + "text": "As shown in Figure 4 (a), the document contains columns of text written vertically 15, a common style in Japanese. Due to scanning noise and archaic printing technology, the columns can be skewed or have vari- able widths, and hence cannot be eas- ily identi\ufb01ed via rule-based methods. Within each column, words are sepa- rated by white spaces of variable size, and the vertical positions of objects can be an indicator of their layout type.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e55432edc0ceae5fae7bedae3bd560c6", + "text": "Fig. 5: Illustration of how LayoutParser helps with the historical document digi- tization pipeline.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "17e31eed33c4abb58af55aa5c6e0b7b0", + "text": "15 A document page consists of eight rows like this. For simplicity we skip the row segmentation discussion and refer readers to the source code when available.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 11, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "9d917f215b0115c679105482b80d2d2d", + "text": "12 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4bba0fe5b17811e76afbf7650f2f6792", + "text": "To decipher the complicated layout", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7e1f7b138c864ed8b40cf0f3d38801ec", + "text": "structure, two object detection models have been trained to recognize individual columns and tokens, respectively. A small training set (400 images with approxi- mately 100 annotations each) is curated via the active learning based annotation tool [32] in LayoutParser. The models learn to identify both the categories and regions for each token or column via their distinct visual features. The layout data structure enables easy grouping of the tokens within each column, and rearranging columns to achieve the correct reading orders based on the horizontal position. Errors are identi\ufb01ed and recti\ufb01ed via checking the consistency of the model predictions. Therefore, though trained on a small dataset, the pipeline achieves a high level of layout detection accuracy: it achieves a 96.97 AP [19] score across 5 categories for the column detection model, and a 89.23 AP across 4 categories for the token detection model.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dccaa93e7bae24dedf523dd39575dfbe", + "text": "A combination of character recognition methods is developed to tackle the unique challenges in this document. In our experiments, we found that irregular spacing between the tokens led to a low character recognition recall rate, whereas existing OCR models tend to perform better on densely-arranged texts. To overcome this challenge, we create a document reorganization algorithm that rearranges the text based on the token bounding boxes detected in the layout analysis step. Figure 4 (b) illustrates the generated image of dense text, which is sent to the OCR APIs as a whole to reduce the transaction costs. The \ufb02exible coordinate system in LayoutParser is used to transform the OCR results relative to their original positions on the page.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "60c2e2147d0b0dbd576d51b71a95a2ef", + "text": "Additionally, it is common for historical documents to use unique fonts with di\ufb00erent glyphs, which signi\ufb01cantly degrades the accuracy of OCR models trained on modern texts. In this document, a special \ufb02at font is used for printing numbers and could not be detected by o\ufb00-the-shelf OCR engines. Using the highly \ufb02exible functionalities from LayoutParser, a pipeline approach is constructed that achieves a high recognition accuracy with minimal e\ufb00ort. As the characters have unique visual structures and are usually clustered together, we train the layout model to identify number regions with a dedicated category. Subsequently, LayoutParser crops images within these regions, and identi\ufb01es characters within them using a self-trained OCR model based on a CNN-RNN [6]. The model detects a total of 15 possible categories, and achieves a 0.98 Jaccard score16 and a 0.17 average Levinstein distances17 for token prediction on the test set.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "de9e855638523c5f77ed4070813e37a3", + "text": "Overall, it is possible to create an intricate and highly accurate digitization pipeline for large-scale digitization using LayoutParser. The pipeline avoids specifying the complicated rules used in traditional methods, is straightforward to develop, and is robust to outliers. The DL models also generate \ufb01ne-grained results that enable creative approaches like page reorganization for OCR.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "a375a901ba62c168a96725c055b47bad", + "text": "16 This measures the overlap between the detected and ground-truth characters, and the maximum is 1.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "184a3abfd34e7aa04632979ee3c2de36", + "text": "17 This measures the number of edits from the ground-truth text to the predicted text, and lower is better.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "2b7101f39954d5301166b82906202ea9", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Image", + "element_id": "d5c954ff619e348d36d5180feedabc6c", + "text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "FigureCaption", + "element_id": "7e685908875164adafa447ec3d97455e", + "text": "Fig. 6: This lightweight table detector can identify tables (outlined in red) and cells (shaded in blue) in di\ufb00erent locations on a page. In very few cases (d), it might generate minor error predictions, e.g, failing to capture the top text line of a table.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "60e4fa05c78628ec1c6fa6003b86b52e", + "text": "5.2 A light-weight Visual Table Extractor", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "445ad333fa3f7f85d2be634fbdeeb72a", + "text": "Detecting tables and parsing their structures (table extraction) are of central im- portance for many document digitization tasks. Many previous works [26, 30, 27] and tools 18 have been developed to identify and parse table structures. Yet they might require training complicated models from scratch, or are only applicable for born-digital PDF documents. In this section, we show how LayoutParser can help build a light-weight accurate visual table extractor for legal docket tables using the existing resources with minimal e\ufb00ort.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "923b62eb8550ec49cf6d3f2e6bac7ec8", + "text": "The extractor uses a pre-trained layout detection model for identifying the table regions and some simple rules for pairing the rows and the columns in the PDF image. Mask R-CNN [12] trained on the PubLayNet dataset [38] from the LayoutParser Model Zoo can be used for detecting table regions. By \ufb01ltering out model predictions of low con\ufb01dence and removing overlapping predictions, LayoutParser can identify the tabular regions on each page, which signi\ufb01cantly simpli\ufb01es the subsequent steps. By applying the line detection functions within the tabular segments, provided in the utility module from LayoutParser, the pipeline can identify the three distinct columns in the tables. A row clustering method is then applied via analyzing the y coordinates of token bounding boxes in the left-most column, which are obtained from the OCR engines. A non-maximal suppression algorithm is used to remove duplicated rows with extremely small gaps. Shown in Figure 6, the built pipeline can detect tables at di\ufb00erent positions on a page accurately. Continued tables from di\ufb00erent pages are concatenated, and a structured table representation has been easily created.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "abb8837ab23e7fcaa35969b0119bcc2e", + "text": "18 https://github.com/atlanhq/camelot, https://github.com/tabulapdf/tabula", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 13, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "cf4d8c7a6177bda0ced6458173952021", + "text": "14 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "030602b79a8995e9a457b875d94c016d", + "text": "6 Conclusion", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e79cef57c86050aa5fc74e5cd3923197", + "text": "LayoutParser provides a comprehensive toolkit for deep learning-based document image analysis. The o\ufb00-the-shelf library is easy to install, and can be used to build \ufb02exible and accurate pipelines for processing documents with complicated structures. It also supports high-level customization and enables easy labeling and training of DL models on unique document image datasets. The LayoutParser community platform facilitates sharing DL models and DIA pipelines, inviting discussion and promoting code reproducibility and reusability. The LayoutParser team is committed to keeping the library updated continuously and bringing the most recent advances in DL-based DIA, such as multi-modal document modeling [37, 36, 9] (an upcoming priority), to a diverse audience of end-users.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dd3013dfc8b3df79d89268894b10571e", + "text": "Acknowledgements We thank the anonymous reviewers for their comments and suggestions. This project is supported in part by NSF Grant OIA-2033558 and funding from the Harvard Data Science Initiative and Harvard Catalyst. Zejiang Shen thanks Doug Downey for suggestions.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "58c1b97c7988d78495527a0726a7c85a", + "text": "References", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "85e09a5617e58a3a78b22fd12eb29eaf", + "text": "[1] Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man\u00b4e, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi\u00b4egas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015), https://www.tensorflow.org/, software available from tensor\ufb02ow.org", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "ad466edc2a12c9be4bf951fd8b5bf818", + "text": "[2] Alberti, M., Pondenkandath, V., W\u00a8ursch, M., Ingold, R., Liwicki, M.: Deepdiva: a highly-functional python framework for reproducible experiments. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 423\u2013428. IEEE (2018)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "217777f3d44620afddc1e27553e81a66", + "text": "[3] Antonacopoulos, A., Bridson, D., Papadopoulos, C., Pletschacher, S.: A realistic dataset for performance evaluation of document layout analysis. In: 2009 10th International Conference on Document Analysis and Recognition. pp. 296\u2013300. IEEE (2009)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "292dd088dc6a174159395e31be7755d7", + "text": "[4] Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. pp. 9365\u20139374 (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "52a5a7f582c381ec8c7c1abc7e983191", + "text": "[5] Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale Hierarchical Image Database. In: CVPR09 (2009)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "4e93c51c89970349aa9e0a42cb330c4b", + "text": "[6] Deng, Y., Kanervisto, A., Ling, J., Rush, A.M.: Image-to-markup generation with coarse-to-\ufb01ne attention. In: International Conference on Machine Learning. pp. 980\u2013989. PMLR (2017)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "8cfd166d282469f765423faae44271e2", + "text": "[7] Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International conference on machine learning. pp. 1180\u20131189. PMLR (2015)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 14, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "8bce49aab693aad97676011688f3f6f3", + "text": "LayoutParser: A Uni\ufb01ed Toolkit for DL-Based DIA", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8605ad66ac8429ae6e92841d5026f0de", + "text": "Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) Lukasz Garncarek, Powalski, R., Stanistawek, T., Topolski, B., Halama, P., Graliriski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "39972987462975e72ff97f3cc3d28223", + "text": "[10] Graves, A., Fern\u00b4andez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classi\ufb01cation: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369\u2013376 (2006)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "559ea792f7c0c98e4af9e3436774efa9", + "text": "[11] Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classi\ufb01cation and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991\u2013995. IEEE (2015) [12] He, K., Gkioxari, G., Doll\u00b4ar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "a25accb47954c56b35a06609449901ef", + "text": "IEEE international conference on computer vision. pp. 2961\u20132969 (2017)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "616320116770187bb631e2bcabdc44fe", + "text": "[13] He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770\u2013778 (2016)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "8ead02f7720d59492ca67a5cfddd4552", + "text": "[14] Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J. 2007(159), 2 (Jul 2007)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "ccf2aef698df297baac645bfbe87b5a4", + "text": "[15] Lamiroy, B., Lopresti, D.: An open architecture for end-to-end document analysis benchmarking. In: 2011 International Conference on Document Analysis and Recognition. pp. 42\u201347. IEEE (2011)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "7303875a4141fe55ab6c6538d2660269", + "text": "[16] Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120\u2013122. UIST \u201920 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https://doi.org/10.1145/3379350.3416143, https://doi-org.offcampus. lib.washington.edu/10.1145/3379350.3416143", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "484bdc79ca505343715e3d177bd17275", + "text": "[17] Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055\u20133062. Association for Computing Machinery, New York, NY, USA (2020), https://doi.org/10.1145/3340531.3412767", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "d3a921d79a30615dcf174c93d2da8d4d", + "text": "[18] Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "8eea8c964496b9e3de3099a9af798467", + "text": "[19] Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00b4ar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740\u2013755. Springer (2014)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "de8aee29b21c13139f4875a90a52d0a0", + "text": "[20] Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431\u20133440 (2015)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "bce47bb8dec257c966d948be79e80094", + "text": "[21] Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Su\ufb01, S., Williams, A., Wolsten- croft, K.: An experimental work\ufb02ow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161\u2013168 (2011)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "7a372cbcf79efc9cc23d35644816ca15", + "text": "[22] Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7\u201312. IEEE (2018)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 15, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "2125d675311c00e01a24886e8a603ee1", + "text": "16 Z. Shen et al.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e5e88c91dcc8703ef7ffaf69fe565020", + "text": "[23] Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic di\ufb00erentiation in pytorch (2017) [24] Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) [25] Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257\u2013260. IEEE (2010)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "a647b5ee9dfd11735b912b0510f476a1", + "text": "[26] Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572\u2013573 (2020)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "70a42a501297733d90dbcae55dbc2b78", + "text": "[27] Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142\u2013147. IEEE (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "3d9af66828b6b1e385e04dcad340e403", + "text": "[28] Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91\u201399 (2015)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ff7c339e3258376076b2f515c6b0f01e", + "text": "[29] Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE transactions on neural networks 20(1), 61\u201380 (2008) [30] Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162\u20131167. IEEE (2017)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "410d64198e29b695d48db2cd3781daae", + "text": "[31] Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548\u2013549 (2020)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "fc8457575ed11e22f45c936aba277303", + "text": "[32] Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning based layout annotation. arXiv preprint arXiv:2010.01762 (2020)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "b66f47222b34c59b619b0f90b165b093", + "text": "[33] Studer, L., Alberti, M., Pondenkandath, V., Goktepe, P., Kolonko, T., Fischer, A., Liwicki, M., Ingold, R.: A comprehensive study of imagenet pre-training for historical document image analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 720\u2013725. IEEE (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "93eb7c029c0a6d8353aba82f5f2d389d", + "text": "[34] Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., et al.: Huggingface\u2019s transformers: State-of- the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019) [35] Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https://", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "ba70589bb3f48ccf6e18724702cc1f10", + "text": "github.com/facebookresearch/detectron2 (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "a8ce4311d30f1f7cba9043e30c9ad6d1", + "text": "[36] Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "a9acaa0d527f89ed3f3c7daac7694a23", + "text": "[37] Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of text and layout for document image understanding (2019)", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "ListItem", + "element_id": "b0e2d232fd257ee8ca691ff77b74fcee", + "text": "[38] Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for doc- In: 2019 International Conference on Document IEEE (Sep 2019). ument Analysis and Recognition (ICDAR). pp. 1015\u20131022. https://doi.org/10.1109/ICDAR.2019.00166 layout analysis.", + "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 16, + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json deleted file mode 100644 index ec397366ad..0000000000 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json +++ /dev/null @@ -1,3326 +0,0 @@ -[ - { - "element_id": "d3ce55f220dfb75891b4394a18bcb973", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "1 2 0 2", - "type": "UncategorizedText" - }, - { - "element_id": "d8294655784148f3059eb08db918977c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "n u J 1 2 ] V C . s c [", - "type": "Header" - }, - { - "element_id": "c0cdc594eccc53cfb75eeef0ad75b65b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a", - "type": "UncategorizedText" - }, - { - "element_id": "4467e9baee9456824c1aa679526f6979", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis", - "type": "Title" - }, - { - "element_id": "33dff5d4db499a435f61220a890d3f04", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Zejiang Shen! (4), Ruochen Zhang”, Melissa Dell?, Benjamin Charles Germain Lee*, Jacob Carlson’, and Weining Li>", - "type": "NarrativeText" - }, - { - "element_id": "a0bbb18d9710661eb9e2aa6e651e6555", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca", - "type": "NarrativeText" - }, - { - "element_id": "ea0cc468a56f4af65b968fb86913bdfe", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.", - "type": "NarrativeText" - }, - { - "element_id": "370e1b61d1dab8ae35d62eb6f42feceb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Keywords: Document Image Analysis · Deep Learning · Layout Analysis · Character Recognition · Open Source library · Toolkit.", - "type": "NarrativeText" - }, - { - "element_id": "f12febfe29a59a8e4ce6b3494d6deb8a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Introduction", - "type": "Title" - }, - { - "element_id": "fd81374ba214b5472d0b60b2371ae8df", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Deep Learning(DL)-based approaches are the state-of-the-art for a wide range of document image analysis (DIA) tasks including document image classification [11,", - "type": "NarrativeText" - }, - { - "element_id": "e7e0acf197e89d650d39fa3ce929509e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "2 Z. Shen et al.", - "type": "Title" - }, - { - "element_id": "4b097cc42d7d30e720512dbce0cb4905", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "37], layout detection [38, 22], table detection [26], and scene text detection [4]. A generalized learning-based framework dramatically reduces the need for the manual specification of complicated rules, which is the status quo with traditional methods. DL has the potential to transform DIA pipelines and benefit a broad spectrum of large-scale document digitization projects.", - "type": "NarrativeText" - }, - { - "element_id": "45844a4901777afaf6de9a0994e017eb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "However, there are several practical difficulties for taking advantages of re- cent advances in DL-based methods: 1) DL models are notoriously convoluted for reuse and extension. Existing models are developed using distinct frame- works like TensorFlow [1] or PyTorch [24], and the high-level parameters can be obfuscated by implementation details [8]. It can be a time-consuming and frustrating experience to debug, reproduce, and adapt existing models for DIA, and many researchers who would benefit the most from using these methods lack the technical background to implement them from scratch. 2) Document images contain diverse and disparate patterns across domains, and customized training is often required to achieve a desirable detection accuracy. Currently there is no full-fledged infrastructure for easily curating the target document image datasets and fine-tuning or re-training the models. 3) DIA usually requires a sequence of models and other processing to obtain the final outputs. Often research teams use DL models and then perform further document analyses in separate processes, and these pipelines are not documented in any central location (and often not documented at all). This makes it difficult for research teams to learn about how full pipelines are implemented and leads them to invest significant resources in reinventing the DIA wheel.", - "type": "NarrativeText" - }, - { - "element_id": "6f3c8d55dd5a4f95d8a59d146ca9ffa7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "LayoutParser provides a unified toolkit to support DL-based document image analysis and processing. To address the aforementioned challenges, LayoutParser is built with the following components:", - "type": "NarrativeText" - }, - { - "element_id": "9ce12a49c1a9972b4cd2c3f66595b2b6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "1. An off-the-shelf toolkit for applying DL models for layout detection, character recognition, and other DIA tasks (Section 3)", - "type": "ListItem" - }, - { - "element_id": "40f42a96bdd1559e09d74090c0fe9df3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "2. A rich repository of pre-trained neural network models (Model Zoo) that underlies the off-the-shelf usage", - "type": "ListItem" - }, - { - "element_id": "0ca448d3ae0c4ee73bf46e8edfcd417d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "3. Comprehensive tools for efficient document image data annotation and model tuning to support different levels of customization", - "type": "ListItem" - }, - { - "element_id": "7a9de9b00d51bd670feccc2eb84a147e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "4. A DL model hub and community platform for the easy sharing, distribu- tion, and discussion of DIA models and pipelines, to promote reusability, reproducibility, and extensibility (Section 4)", - "type": "ListItem" - }, - { - "element_id": "8e216e91ff3471241858f1df445cdf0a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The library implements simple and intuitive Python APIs without sacrificing generalizability and versatility, and can be easily installed via pip. Its convenient functions for handling document image data can be seamlessly integrated with existing DIA pipelines. With detailed documentations and carefully curated tutorials, we hope this tool will benefit a variety of end-users, and will lead to advances in applications in both industry and academic research.", - "type": "NarrativeText" - }, - { - "element_id": "583775f22c8080098beebbef960e2fbf", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "LayoutParser is well aligned with recent efforts for improving DL model reusability in other disciplines like natural language processing [8, 34] and com- puter vision [35], but with a focus on unique challenges in DIA. We show LayoutParser can be applied in sophisticated and large-scale digitization projects", - "type": "NarrativeText" - }, - { - "element_id": "f5a6697190c20bf6030d8e4ae8f6861a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "Header" - }, - { - "element_id": "50846086f4d9ece02052735686278699", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "that require precision, efficiency, and robustness, as well as simple and light- weight document processing tasks focusing on efficacy and flexibility (Section 5). LayoutParser is being actively maintained, and support for more deep learning models and novel methods in text-based layout analysis methods [37, 34] is planned.", - "type": "NarrativeText" - }, - { - "element_id": "0ce686208eb4aba70d0cd053d50c7bc2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The rest of the paper is organized as follows. Section 2 provides an overview of related work. The core LayoutParser library, DL Model Zoo, and customized model training are described in Section 3, and the DL model hub and commu- nity platform are detailed in Section 4. Section 5 shows two examples of how LayoutParser can be used in practical DIA projects, and Section 6 concludes.", - "type": "NarrativeText" - }, - { - "element_id": "1548efaaa18cf819f9498d76a0440316", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "2 Related Work", - "type": "Title" - }, - { - "element_id": "8153390c1bb8652313be64034531449e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Recently, various DL models and datasets have been developed for layout analysis tasks. The dhSegment [22] utilizes fully convolutional networks [20] for segmen- tation tasks on historical documents. Object detection-based methods like Faster R-CNN [28] and Mask R-CNN [12] are used for identifying document elements [38] and detecting tables [30, 26]. Most recently, Graph Neural Networks [29] have also been used in table detection [27]. However, these models are usually implemented individually and there is no unified framework to load and use such models.", - "type": "NarrativeText" - }, - { - "element_id": "45d6d494603e84706884918c1f785c9f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "There has been a surge of interest in creating open-source tools for document image processing: a search of document image analysis in Github leads to 5M relevant code pieces 6; yet most of them rely on traditional rule-based methods or provide limited functionalities. The closest prior research to our work is the OCR-D project7, which also tries to build a complete toolkit for DIA. However, similar to the platform developed by Neudecker et al. [21], it is designed for analyzing historical documents, and provides no supports for recent DL models. The DocumentLayoutAnalysis project8 focuses on processing born-digital PDF documents via analyzing the stored PDF data. Repositories like DeepLayout9 and Detectron2-PubLayNet10 are individual deep learning models trained on layout analysis datasets without support for the full DIA pipeline. The Document Analysis and Exploitation (DAE) platform [15] and the DeepDIVA project [2] aim to improve the reproducibility of DIA methods (or DL models), yet they are not actively maintained. OCR engines like Tesseract [14], easyOCR11 and paddleOCR12 usually do not come with comprehensive functionalities for other DIA tasks like layout analysis.", - "type": "NarrativeText" - }, - { - "element_id": "73feaff827cbc7089d3f95d1e5aac6aa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Recent years have also seen numerous efforts to create libraries for promoting reproducibility and reusability in the field of DL. Libraries like Dectectron2 [35],", - "type": "NarrativeText" - }, - { - "element_id": "e55d055f6205d93e21c673d749264e7a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "6 The number shown is obtained by specifying the search type as ‘code’. 7 https://ocr-d.de/en/about 8 https://github.com/BobLd/DocumentLayoutAnalysis 9 https://github.com/leonlulu/DeepLayout 10 https://github.com/hpanwar08/detectron2 11 https://github.com/JaidedAI/EasyOCR 12 https://github.com/PaddlePaddle/PaddleOCR", - "type": "NarrativeText" - }, - { - "element_id": "fe238f610fe610b8ce1abaa08a0e3e63", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "4", - "type": "UncategorizedText" - }, - { - "element_id": "92c4289ad4af7c0793e40d5662707e0a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Z. Shen et al.", - "type": "NarrativeText" - }, - { - "element_id": "642416e5d6c99219b16dbba6f72392c5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Efficient Data Annotation Model Customization Document Images Community Platform ‘a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY", - "type": "Image" - }, - { - "element_id": "2172d9b276cd7a485dea4978805815d8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Fig. 1: The overall architecture of LayoutParser. For an input document image, the core LayoutParser library provides a set of off-the-shelf tools for layout detection, OCR, visualization, and storage, backed by a carefully designed layout data structure. LayoutParser also supports high level customization via efficient layout annotation and model training functions. These improve model accuracy on the target samples. The community platform enables the easy sharing of DIA models and whole digitization pipelines to promote reusability and reproducibility. A collection of detailed documentation, tutorials and exemplar projects make LayoutParser easy to learn and use.", - "type": "NarrativeText" - }, - { - "element_id": "b4948db85ca791e99aa92589fc41734f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "AllenNLP [8] and transformers [34] have provided the community with complete DL-based support for developing and deploying models for general computer vision and natural language processing problems. LayoutParser, on the other hand, specializes specifically in DIA tasks. LayoutParser is also equipped with a community platform inspired by established model hubs such as Torch Hub [23] and TensorFlow Hub [1]. It enables the sharing of pretrained models as well as full document processing pipelines that are unique to DIA tasks.", - "type": "NarrativeText" - }, - { - "element_id": "7651db80014a85ab253367d3bd3e4f88", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "There have been a variety of document data collections to facilitate the development of DL models. Some examples include PRImA [3](magazine layouts), PubLayNet [38](academic paper layouts), Table Bank [18](tables in academic papers), Newspaper Navigator Dataset [16, 17](newspaper figure layouts) and HJDataset [31](historical Japanese document layouts). A spectrum of models trained on these datasets are currently available in the LayoutParser model zoo to support different use cases.", - "type": "NarrativeText" - }, - { - "element_id": "5a1838a8f40b4523094652cf14ab974c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "3 The Core LayoutParser Library", - "type": "Title" - }, - { - "element_id": "47e45d28d96fc14ddc709835de35ece5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "At the core of LayoutParser is an off-the-shelf toolkit that streamlines DL- based document image analysis. Five components support a simple interface with comprehensive functionalities: 1) The layout detection models enable using pre-trained or self-trained DL models for layout detection with just four lines of code. 2) The detected layout information is stored in carefully engineered", - "type": "NarrativeText" - }, - { - "element_id": "cd1112d2b15a0d27a29b1c83b2afd0dd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "0b9956dc7ccd1d758263217beda63196", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Table 1: Current layout detection models in the LayoutParser model zoo", - "type": "FigureCaption" - }, - { - "element_id": "2a62c55be8401908c18140e858ec3345", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5, - "table_as_cells": [ - { - "content": "Dataset", - "h": 1, - "w": 1, - "x": 0, - "y": 0 - }, - { - "content": "PubLayNet B8]|", - "h": 1, - "w": 1, - "x": 0, - "y": 1 - }, - { - "content": "PRImA", - "h": 1, - "w": 1, - "x": 0, - "y": 2 - }, - { - "content": "Newspaper", - "h": 1, - "w": 1, - "x": 0, - "y": 3 - }, - { - "content": "TableBank", - "h": 1, - "w": 1, - "x": 0, - "y": 4 - }, - { - "content": "HJDataset", - "h": 1, - "w": 1, - "x": 0, - "y": 5 - }, - { - "content": "| Base Model'|", - "h": 1, - "w": 1, - "x": 1, - "y": 0 - }, - { - "content": "F/M", - "h": 1, - "w": 1, - "x": 1, - "y": 1 - }, - { - "content": "M", - "h": 1, - "w": 1, - "x": 1, - "y": 2 - }, - { - "content": "F", - "h": 1, - "w": 1, - "x": 1, - "y": 3 - }, - { - "content": "F", - "h": 1, - "w": 1, - "x": 1, - "y": 4 - }, - { - "content": "F/M", - "h": 1, - "w": 1, - "x": 1, - "y": 5 - }, - { - "content": "| Notes", - "h": 1, - "w": 1, - "x": 2, - "y": 0 - }, - { - "content": "Layouts of modern scientific documents", - "h": 1, - "w": 1, - "x": 2, - "y": 1 - }, - { - "content": "Layouts of scanned modern magazines and scientific report", - "h": 1, - "w": 1, - "x": 2, - "y": 2 - }, - { - "content": "Layouts of scanned US newspapers from the 20th century", - "h": 1, - "w": 1, - "x": 2, - "y": 3 - }, - { - "content": "Table region on modern scientific and business document", - "h": 1, - "w": 1, - "x": 2, - "y": 4 - }, - { - "content": "Layouts of history Japanese documents", - "h": 1, - "w": 1, - "x": 2, - "y": 5 - } - ], - "text_as_html": "
Dataset| Base Model'|| Notes
PubLayNet B8]|F/MLayouts of modern scientific documents
PRImAMLayouts of scanned modern magazines and scientific report
NewspaperFLayouts of scanned US newspapers from the 20th century
TableBankFTable region on modern scientific and business document
HJDatasetF/MLayouts of history Japanese documents
" - }, - "text": "Dataset Base Model1 Large Model Notes PubLayNet [38] PRImA [3] Newspaper [17] TableBank [18] HJDataset [31] F / M M F F F / M M - - F - Layouts of modern scientific documents Layouts of scanned modern magazines and scientific reports Layouts of scanned US newspapers from the 20th century Table region on modern scientific and business document Layouts of history Japanese documents", - "type": "Table" - }, - { - "element_id": "f978160527177fa39c13774ec8dfa9cb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "1 For each dataset, we train several models of different sizes for different needs (the trade-off between accuracy vs. computational cost). For “base model” and “large model”, we refer to using the ResNet 50 or ResNet 101 backbones [13], respectively. One can train models of different architectures, like Faster R-CNN [28] (F) and Mask R-CNN [12] (M). For example, an F in the Large Model column indicates it has a Faster R-CNN model trained using the ResNet 101 backbone. The platform is maintained and a number of additions will be made to the model zoo in coming months.", - "type": "FigureCaption" - }, - { - "element_id": "55b33df7609960c3552a0b7bc1a5a9c6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "layout data structures, which are optimized for efficiency and versatility. 3) When necessary, users can employ existing or customized OCR models via the unified API provided in the OCR module. 4) LayoutParser comes with a set of utility functions for the visualization and storage of the layout data. 5) LayoutParser is also highly customizable, via its integration with functions for layout data annotation and model training. We now provide detailed descriptions for each component.", - "type": "NarrativeText" - }, - { - "element_id": "6e9df774416cc71548308e324b4bdbb7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "3.1 Layout Detection Models", - "type": "Title" - }, - { - "element_id": "bbcc10c2b92de0cbdce8629f18b0d7ad", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "In LayoutParser, a layout model takes a document image as an input and generates a list of rectangular boxes for the target content regions. Different from traditional methods, it relies on deep convolutional neural networks rather than manually curated rules to identify content regions. It is formulated as an object detection problem and state-of-the-art models like Faster R-CNN [28] and Mask R-CNN [12] are used. This yields prediction results of high accuracy and makes it possible to build a concise, generalized interface for layout detection. LayoutParser, built upon Detectron2 [35], provides a minimal API that can perform layout detection with only four lines of code in Python:", - "type": "NarrativeText" - }, - { - "element_id": "a25137fdc995e079684174269dc0effa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "1 import layoutparser as lp 2 image = cv2 . imread ( \" image_file \" ) # load images 3 model = lp . De t e c tro n2 Lay outM odel ( \" lp :// PubLayNet / f as t er _ r c nn _ R _ 50 _ F P N_ 3 x / config \" ) 4 5 layout = model . detect ( image )", - "type": "NarrativeText" - }, - { - "element_id": "59171bb0b4a32c9ec1b0e1d327ddb88f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "LayoutParser provides a wealth of pre-trained model weights using various datasets covering different languages, time periods, and document types. Due to domain shift [7], the prediction performance can notably drop when models are ap- plied to target samples that are significantly different from the training dataset. As document structures and layouts vary greatly in different domains, it is important to select models trained on a dataset similar to the test samples. A semantic syntax is used for initializing the model weights in LayoutParser, using both the dataset name and model name lp:///.", - "type": "NarrativeText" - }, - { - "element_id": "676118b62c2261113a23a610c2ac50cb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "6", - "type": "UncategorizedText" - }, - { - "element_id": "710ac103981c6363195774b02ee582d4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Z. Shen et al.", - "type": "NarrativeText" - }, - { - "element_id": "6eb2bb6ca50b3be177565f9ff546bce8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "- ° . 3 a a 4 a 3 oo er ‘ 2 § 8 a 8 3 3 ‘ £ 4 A g a 9 ‘ 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 ’ g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff", - "type": "Image" - }, - { - "element_id": "c2a2a4a054151d16820f38e115ce7a72", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Fig. 2: The relationship between the three types of layout data structures. Coordinate supports three kinds of variation; TextBlock consists of the co- ordinate information and extra features like block text, types, and reading orders; a Layout object is a list of all possible layout elements, including other Layout objects. They all support the same set of transformation and operation APIs for maximum flexibility.", - "type": "FigureCaption" - }, - { - "element_id": "d997f63fd79c7e03050ca01b58dfdf0a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Shown in Table 1, LayoutParser currently hosts 9 pre-trained models trained on 5 different datasets. Description of the training dataset is provided alongside with the trained models such that users can quickly identify the most suitable models for their tasks. Additionally, when such a model is not readily available, LayoutParser also supports training customized layout models and community sharing of the models (detailed in Section 3.5).", - "type": "NarrativeText" - }, - { - "element_id": "836e9227ef393d8b00369e6300fbba4c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "3.2 Layout Data Structures", - "type": "Title" - }, - { - "element_id": "601f7d95172984c75de081023ca64c15", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "A critical feature of LayoutParser is the implementation of a series of data structures and operations that can be used to efficiently process and manipulate the layout elements. In document image analysis pipelines, various post-processing on the layout analysis model outputs is usually required to obtain the final outputs. Traditionally, this requires exporting DL model outputs and then loading the results into other pipelines. All model outputs from LayoutParser will be stored in carefully engineered data types optimized for further processing, which makes it possible to build an end-to-end document digitization pipeline within LayoutParser. There are three key components in the data structure, namely the Coordinate system, the TextBlock, and the Layout. They provide different levels of abstraction for the layout data, and a set of APIs are supported for transformations or operations on these classes.", - "type": "NarrativeText" - }, - { - "element_id": "48d58ed9a3d95637df68c8b810147ba1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "dcdc0dc4759bd20c04026973cbe386e2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Coordinates are the cornerstones for storing layout information. Currently, three types of Coordinate data structures are provided in LayoutParser, shown in Figure 2. Interval and Rectangle are the most common data types and support specifying 1D or 2D regions within a document. They are parameterized with 2 and 4 parameters. A Quadrilateral class is also implemented to support a more generalized representation of rectangular regions when the document is skewed or distorted, where the 4 corner points can be specified and a total of 8 degrees of freedom are supported. A wide collection of transformations like shift, pad, and scale, and operations like intersect, union, and is_in, are supported for these classes. Notably, it is common to separate a segment of the image and analyze it individually. LayoutParser provides full support for this scenario via image cropping operations crop_image and coordinate transformations like relative_to and condition_on that transform coordinates to and from their relative representations. We refer readers to Table 2 for a more detailed description of these operations13.", - "type": "NarrativeText" - }, - { - "element_id": "3f620e1ad95cd446170613ed9d780853", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Based on Coordinates, we implement the TextBlock class that stores both the positional and extra features of individual layout elements. It also supports specifying the reading orders via setting the parent field to the index of the parent object. A Layout class is built that takes in a list of TextBlocks and supports processing the elements in batch. Layout can also be nested to support hierarchical layout structures. They support the same operations and transformations as the Coordinate classes, minimizing both learning and deployment effort.", - "type": "NarrativeText" - }, - { - "element_id": "2b81bd7a3f21b84379bfcd4bb175c5d1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.3 OCR", - "type": "Title" - }, - { - "element_id": "16565416942e53cf65f75a8a845df211", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "LayoutParser provides a unified interface for existing OCR tools. Though there are many OCR tools available, they are usually configured differently with distinct APIs or protocols for using them. It can be inefficient to add new OCR tools into an existing pipeline, and difficult to make direct comparisons among the available tools to find the best option for a particular project. To this end, LayoutParser builds a series of wrappers among existing OCR engines, and provides nearly the same syntax for using them. It supports a plug-and-play style of using OCR engines, making it effortless to switch, evaluate, and compare different OCR modules:", - "type": "NarrativeText" - }, - { - "element_id": "373a9a67f855ba5b79bdc1393d2f1ce9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1 ocr_agent = lp . TesseractAgent () 2 # Can be easily switched to other OCR software 3 tokens = ocr_agent . detect ( image )", - "type": "NarrativeText" - }, - { - "element_id": "5bc3c9470dc53c60c1fd04828105afdd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "The OCR outputs will also be stored in the aforementioned layout data structures and can be seamlessly incorporated into the digitization pipeline. Currently LayoutParser supports the Tesseract and Google Cloud Vision OCR engines.", - "type": "NarrativeText" - }, - { - "element_id": "fa023ccf2ac1042ef254ecf47cc592ca", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "LayoutParser also comes with a DL-based CNN-RNN OCR model [6] trained with the Connectionist Temporal Classification (CTC) loss [10]. It can be used like the other OCR modules, and can be easily trained on customized datasets.", - "type": "NarrativeText" - }, - { - "element_id": "a2a0a2ef0279f0710f3cd34474ca8645", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "13 This is also available in the LayoutParser documentation pages.", - "type": "NarrativeText" - }, - { - "element_id": "5498a550b5367fa8dc935013956d09fa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "8 Z. Shen et al.", - "type": "ListItem" - }, - { - "element_id": "a5ce184b53898a543bca90a5b0acd156", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Table 2: All operations supported by the layout elements. The same APIs are supported across different layout element classes including Coordinate types, TextBlock and Layout.", - "type": "NarrativeText" - }, - { - "element_id": "64bc79d1132a89c71837f420d6e4e2dc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8, - "table_as_cells": [ - { - "content": "block.pad(top, bottom,", - "h": 1, - "w": 1, - "x": 0, - "y": 0 - }, - { - "content": "block.scale(fx, fy)", - "h": 1, - "w": 1, - "x": 0, - "y": 1 - }, - { - "content": "block.shift(dx, dy)", - "h": 1, - "w": 1, - "x": 0, - "y": 2 - }, - { - "content": "block1.is_in(block2)", - "h": 1, - "w": 1, - "x": 0, - "y": 3 - }, - { - "content": "block1. intersect (block2)", - "h": 1, - "w": 1, - "x": 0, - "y": 4 - }, - { - "content": "block1.union(block2)", - "h": 1, - "w": 1, - "x": 0, - "y": 5 - }, - { - "content": "block1.relative_to(block2)", - "h": 1, - "w": 1, - "x": 0, - "y": 6 - }, - { - "content": "block1.condition_on(block2)", - "h": 1, - "w": 1, - "x": 0, - "y": 7 - }, - { - "content": "block. crop_image (image)", - "h": 1, - "w": 1, - "x": 0, - "y": 8 - }, - { - "content": "right,", - "h": 1, - "w": 1, - "x": 1, - "y": 0 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 1 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 2 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 3 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 4 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 5 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 6 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 7 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 1, - "y": 8 - }, - { - "content": "left)", - "h": 1, - "w": 1, - "x": 2, - "y": 0 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 1 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 2 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 3 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 4 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 5 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 6 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 7 - }, - { - "content": "", - "h": 1, - "w": 1, - "x": 2, - "y": 8 - }, - { - "content": "Enlarge the current block according to the input", - "h": 1, - "w": 1, - "x": 3, - "y": 0 - }, - { - "content": "Scale the current block given the ratio in x and y direction", - "h": 1, - "w": 1, - "x": 3, - "y": 1 - }, - { - "content": "Move the current block with the shift distances in x and y direction", - "h": 1, - "w": 1, - "x": 3, - "y": 2 - }, - { - "content": "Whether block] is inside of block2", - "h": 1, - "w": 1, - "x": 3, - "y": 3 - }, - { - "content": "Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs", - "h": 1, - "w": 1, - "x": 3, - "y": 4 - }, - { - "content": "Return the union region of blockl and block2. Coordinate type to be determined based on the inputs", - "h": 1, - "w": 1, - "x": 3, - "y": 5 - }, - { - "content": "Convert the absolute coordinates of block to relative coordinates to block2", - "h": 1, - "w": 1, - "x": 3, - "y": 6 - }, - { - "content": "Calculate the absolute coordinates of blockl given the canvas block2’s absolute coordinates", - "h": 1, - "w": 1, - "x": 3, - "y": 7 - }, - { - "content": "Obtain the image segments in the block region", - "h": 1, - "w": 1, - "x": 3, - "y": 8 - } - ], - "text_as_html": "
block.pad(top, bottom,right,left)Enlarge the current block according to the input
block.scale(fx, fy)Scale the current block given the ratio in x and y direction
block.shift(dx, dy)Move the current block with the shift distances in x and y direction
block1.is_in(block2)Whether block] is inside of block2
block1. intersect (block2)Return the intersection region of blockl and block2. Coordinate type to be determined based on the inputs
block1.union(block2)Return the union region of blockl and block2. Coordinate type to be determined based on the inputs
block1.relative_to(block2)Convert the absolute coordinates of block to relative coordinates to block2
block1.condition_on(block2)Calculate the absolute coordinates of blockl given the canvas block2’s absolute coordinates
block. crop_image (image)Obtain the image segments in the block region
" - }, - "text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input block.scale(fx, fy) Scale the current block given the ratio in x and y direction block.shift(dx, dy) Move the current block with the shift distances in x and y direction block1.is in(block2) Whether block1 is inside of block2 block1.intersect(block2) Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs. block1.union(block2) Return the union region of block1 and block2. Coordinate type to be determined based on the inputs. block1.relative to(block2) Convert the absolute coordinates of block1 to relative coordinates to block2 block1.condition on(block2) Calculate the absolute coordinates of block1 given the canvas block2’s absolute coordinates block.crop image(image) Obtain the image segments in the block region", - "type": "Table" - }, - { - "element_id": "3d803300353e1055611e38b1b2eb19a7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "3.4 Storage and visualization", - "type": "Title" - }, - { - "element_id": "97ec9aa9ca6081a954acf13151c9239e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "The end goal of DIA is to transform the image-based document data into a structured database. LayoutParser supports exporting layout data into different formats like JSON, csv, and will add the support for the METS/ALTO XML format 14 . It can also load datasets from layout analysis-specific formats like COCO [38] and the Page Format [25] for training layout models (Section 3.5). Visualization of the layout detection results is critical for both presentation and debugging. LayoutParser is built with an integrated API for displaying the layout information along with the original document image. Shown in Figure 3, it enables presenting layout data with rich meta information and features in different modes. More detailed information can be found in the online LayoutParser documentation page.", - "type": "NarrativeText" - }, - { - "element_id": "b3c9f96506599f418cc474db4adb5b0d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "3.5 Customized Model Training", - "type": "Title" - }, - { - "element_id": "1a011956c708d01abb2c058ec28c126f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Besides the off-the-shelf library, LayoutParser is also highly customizable with supports for highly unique and challenging document analysis tasks. Target document images can be vastly different from the existing datasets for train- ing layout models, which leads to low layout detection accuracy. Training data", - "type": "NarrativeText" - }, - { - "element_id": "05f6e98c538d1912459b1e568871e6c2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "14 https://altoxml.github.io", - "type": "NarrativeText" - }, - { - "element_id": "c069937e6c2bfc0f856835f3af4d6181", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "f5450580bd9ae07f4cdf7c23a6ccaf41", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position", - "type": "Image" - }, - { - "element_id": "fadd4ad54cd14e3e4711d41a1c99f813", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Fig. 3: Layout detection and OCR results visualization generated by the LayoutParser APIs. Mode I directly overlays the layout region bounding boxes and categories over the original image. Mode II recreates the original document via drawing the OCR’d texts at their corresponding positions on the image canvas. In this figure, tokens in textual regions are filtered using the API and then displayed.", - "type": "NarrativeText" - }, - { - "element_id": "625c9e1d41a9740f094041595f79953d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "can also be highly sensitive and not sharable publicly. To overcome these chal- lenges, LayoutParser is built with rich features for efficient data annotation and customized model training.", - "type": "NarrativeText" - }, - { - "element_id": "a3498730b5cd3fe9405fad69bcf37882", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "LayoutParser incorporates a toolkit optimized for annotating document lay- outs using object-level active learning [32]. With the help from a layout detection model trained along with labeling, only the most important layout objects within each image, rather than the whole image, are required for labeling. The rest of the regions are automatically annotated with high confidence predictions from the layout detection model. This allows a layout dataset to be created more efficiently with only around 60% of the labeling budget.", - "type": "NarrativeText" - }, - { - "element_id": "c4ccf2cf2e7495668221cbe51534f90b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "After the training dataset is curated, LayoutParser supports different modes for training the layout models. Fine-tuning can be used for training models on a small newly-labeled dataset by initializing the model with existing pre-trained weights. Training from scratch can be helpful when the source dataset and target are significantly different and a large training set is available. However, as suggested in Studer et al.’s work[33], loading pre-trained weights on large-scale datasets like ImageNet [5], even from totally different domains, can still boost model performance. Through the integrated API provided by LayoutParser, users can easily compare model performances on the benchmark datasets.", - "type": "NarrativeText" - }, - { - "element_id": "59c95b02b488f297417af4125e4ac316", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "10 Z. Shen et al.", - "type": "ListItem" - }, - { - "element_id": "6eb34afad9d568fbccde8ac8854dc24d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable Column reading order HEE company type Column Categories (J tite Adress _] ree [7] Section Header Maximum Allowed Height (b) Illustration of the recreated document with dense text structure for better OCR performance", - "type": "Image" - }, - { - "element_id": "ebbb8c84b2a69f817c8ae7df20d72dd9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Fig. 4: Illustration of (a) the original historical Japanese document with layout detection results and (b) a recreated version of the document image that achieves much better character recognition recall. The reorganization algorithm rearranges the tokens based on the their detected bounding boxes given a maximum allowed height.", - "type": "NarrativeText" - }, - { - "element_id": "88f6e589165656eceebf898d0240e05c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "4 LayoutParser Community Platform", - "type": "Title" - }, - { - "element_id": "e9a86eb57ba5483acfeefb0e931402b1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Another focus of LayoutParser is promoting the reusability of layout detection models and full digitization pipelines. Similar to many existing deep learning libraries, LayoutParser comes with a community model hub for distributing layout models. End-users can upload their self-trained models to the model hub, and these models can be loaded into a similar interface as the currently available LayoutParser pre-trained models. For example, the model trained on the News Navigator dataset [17] has been incorporated in the model hub.", - "type": "NarrativeText" - }, - { - "element_id": "c08c76705396fe7a65be5dff6d3bffd5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Beyond DL models, LayoutParser also promotes the sharing of entire doc- ument digitization pipelines. For example, sometimes the pipeline requires the combination of multiple DL models to achieve better accuracy. Currently, pipelines are mainly described in academic papers and implementations are often not pub- licly available. To this end, the LayoutParser community platform also enables the sharing of layout pipelines to promote the discussion and reuse of techniques. For each shared pipeline, it has a dedicated project page, with links to the source code, documentation, and an outline of the approaches. A discussion panel is provided for exchanging ideas. Combined with the core LayoutParser library, users can easily build reusable components based on the shared pipelines and apply them to solve their unique problems.", - "type": "NarrativeText" - }, - { - "element_id": "53da8301ac140e0b72cdcf6a7f405918", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "5 Use Cases", - "type": "Title" - }, - { - "element_id": "1fd6bf73b6c80f8ed034bf977fba5a67", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "The core objective of LayoutParser is to make it easier to create both large-scale and light-weight document digitization pipelines. Large-scale document processing", - "type": "NarrativeText" - }, - { - "element_id": "ab543398222da25b3a9231929162d3a0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "4b9eddb71426681f2828832312457b67", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "focuses on precision, efficiency, and robustness. The target documents may have complicated structures, and may require training multiple layout detection models to achieve the optimal accuracy. Light-weight pipelines are built for relatively simple documents, with an emphasis on development ease, speed and flexibility. Ideally one only needs to use existing resources, and model training should be avoided. Through two exemplar projects, we show how practitioners in both academia and industry can easily build such pipelines using LayoutParser and extract high-quality structured document data for their downstream tasks. The source code for these projects will be publicly available in the LayoutParser community hub.", - "type": "NarrativeText" - }, - { - "element_id": "54ee49eac3f4e6098811cda1f9dd0306", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "5.1 A Comprehensive Historical Document Digitization Pipeline", - "type": "Title" - }, - { - "element_id": "07a7dcb89bd4b78209da5f28e2877a1a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "The digitization of historical documents can unlock valuable data that can shed light on many important social, economic, and historical questions. Yet due to scan noises, page wearing, and the prevalence of complicated layout structures, ob- taining a structured representation of historical document scans is often extremely complicated. In this example, LayoutParser was used to develop a comprehensive pipeline, shown in Figure 5, to gener- ate high-quality structured data from historical Japanese firm financial ta- bles with complicated layouts. The pipeline applies two layout models to identify different levels of document structures and two customized OCR engines for optimized character recog- nition accuracy.", - "type": "NarrativeText" - }, - { - "element_id": "02890858b0555e354a0336addbf54a7e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "(spe peepee, ‘Active Learning Layout Annotate Layout Dataset | + ‘Annotation Toolkit ¥ a Deep Leaming Layout Model Training & Inference, ¥ ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <—— Default ane Customized ¥ ee Layout Structure Visualization & Export | <—— | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules", - "type": "Image" - }, - { - "element_id": "42551c9b40827dcdc52055b4d25c6fc3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "As shown in Figure 4 (a), the document contains columns of text written vertically 15, a common style in Japanese. Due to scanning noise and archaic printing technology, the columns can be skewed or have vari- able widths, and hence cannot be eas- ily identified via rule-based methods. Within each column, words are sepa- rated by white spaces of variable size, and the vertical positions of objects can be an indicator of their layout type.", - "type": "NarrativeText" - }, - { - "element_id": "e55432edc0ceae5fae7bedae3bd560c6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Fig. 5: Illustration of how LayoutParser helps with the historical document digi- tization pipeline.", - "type": "NarrativeText" - }, - { - "element_id": "17e31eed33c4abb58af55aa5c6e0b7b0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "15 A document page consists of eight rows like this. For simplicity we skip the row segmentation discussion and refer readers to the source code when available.", - "type": "ListItem" - }, - { - "element_id": "9d917f215b0115c679105482b80d2d2d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "12 Z. Shen et al.", - "type": "Title" - }, - { - "element_id": "4bba0fe5b17811e76afbf7650f2f6792", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "To decipher the complicated layout", - "type": "NarrativeText" - }, - { - "element_id": "7e1f7b138c864ed8b40cf0f3d38801ec", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "structure, two object detection models have been trained to recognize individual columns and tokens, respectively. A small training set (400 images with approxi- mately 100 annotations each) is curated via the active learning based annotation tool [32] in LayoutParser. The models learn to identify both the categories and regions for each token or column via their distinct visual features. The layout data structure enables easy grouping of the tokens within each column, and rearranging columns to achieve the correct reading orders based on the horizontal position. Errors are identified and rectified via checking the consistency of the model predictions. Therefore, though trained on a small dataset, the pipeline achieves a high level of layout detection accuracy: it achieves a 96.97 AP [19] score across 5 categories for the column detection model, and a 89.23 AP across 4 categories for the token detection model.", - "type": "NarrativeText" - }, - { - "element_id": "dccaa93e7bae24dedf523dd39575dfbe", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "A combination of character recognition methods is developed to tackle the unique challenges in this document. In our experiments, we found that irregular spacing between the tokens led to a low character recognition recall rate, whereas existing OCR models tend to perform better on densely-arranged texts. To overcome this challenge, we create a document reorganization algorithm that rearranges the text based on the token bounding boxes detected in the layout analysis step. Figure 4 (b) illustrates the generated image of dense text, which is sent to the OCR APIs as a whole to reduce the transaction costs. The flexible coordinate system in LayoutParser is used to transform the OCR results relative to their original positions on the page.", - "type": "NarrativeText" - }, - { - "element_id": "60c2e2147d0b0dbd576d51b71a95a2ef", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "Additionally, it is common for historical documents to use unique fonts with different glyphs, which significantly degrades the accuracy of OCR models trained on modern texts. In this document, a special flat font is used for printing numbers and could not be detected by off-the-shelf OCR engines. Using the highly flexible functionalities from LayoutParser, a pipeline approach is constructed that achieves a high recognition accuracy with minimal effort. As the characters have unique visual structures and are usually clustered together, we train the layout model to identify number regions with a dedicated category. Subsequently, LayoutParser crops images within these regions, and identifies characters within them using a self-trained OCR model based on a CNN-RNN [6]. The model detects a total of 15 possible categories, and achieves a 0.98 Jaccard score16 and a 0.17 average Levinstein distances17 for token prediction on the test set.", - "type": "NarrativeText" - }, - { - "element_id": "de9e855638523c5f77ed4070813e37a3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "Overall, it is possible to create an intricate and highly accurate digitization pipeline for large-scale digitization using LayoutParser. The pipeline avoids specifying the complicated rules used in traditional methods, is straightforward to develop, and is robust to outliers. The DL models also generate fine-grained results that enable creative approaches like page reorganization for OCR.", - "type": "NarrativeText" - }, - { - "element_id": "a375a901ba62c168a96725c055b47bad", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "16 This measures the overlap between the detected and ground-truth characters, and the maximum is 1.", - "type": "ListItem" - }, - { - "element_id": "184a3abfd34e7aa04632979ee3c2de36", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "17 This measures the number of edits from the ground-truth text to the predicted text, and lower is better.", - "type": "ListItem" - }, - { - "element_id": "2b7101f39954d5301166b82906202ea9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "d5c954ff619e348d36d5180feedabc6c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line", - "type": "Image" - }, - { - "element_id": "7e685908875164adafa447ec3d97455e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "Fig. 6: This lightweight table detector can identify tables (outlined in red) and cells (shaded in blue) in different locations on a page. In very few cases (d), it might generate minor error predictions, e.g, failing to capture the top text line of a table.", - "type": "FigureCaption" - }, - { - "element_id": "60e4fa05c78628ec1c6fa6003b86b52e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "5.2 A light-weight Visual Table Extractor", - "type": "Title" - }, - { - "element_id": "445ad333fa3f7f85d2be634fbdeeb72a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "Detecting tables and parsing their structures (table extraction) are of central im- portance for many document digitization tasks. Many previous works [26, 30, 27] and tools 18 have been developed to identify and parse table structures. Yet they might require training complicated models from scratch, or are only applicable for born-digital PDF documents. In this section, we show how LayoutParser can help build a light-weight accurate visual table extractor for legal docket tables using the existing resources with minimal effort.", - "type": "NarrativeText" - }, - { - "element_id": "923b62eb8550ec49cf6d3f2e6bac7ec8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "The extractor uses a pre-trained layout detection model for identifying the table regions and some simple rules for pairing the rows and the columns in the PDF image. Mask R-CNN [12] trained on the PubLayNet dataset [38] from the LayoutParser Model Zoo can be used for detecting table regions. By filtering out model predictions of low confidence and removing overlapping predictions, LayoutParser can identify the tabular regions on each page, which significantly simplifies the subsequent steps. By applying the line detection functions within the tabular segments, provided in the utility module from LayoutParser, the pipeline can identify the three distinct columns in the tables. A row clustering method is then applied via analyzing the y coordinates of token bounding boxes in the left-most column, which are obtained from the OCR engines. A non-maximal suppression algorithm is used to remove duplicated rows with extremely small gaps. Shown in Figure 6, the built pipeline can detect tables at different positions on a page accurately. Continued tables from different pages are concatenated, and a structured table representation has been easily created.", - "type": "NarrativeText" - }, - { - "element_id": "abb8837ab23e7fcaa35969b0119bcc2e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 13 - }, - "text": "18 https://github.com/atlanhq/camelot, https://github.com/tabulapdf/tabula", - "type": "NarrativeText" - }, - { - "element_id": "cf4d8c7a6177bda0ced6458173952021", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "14 Z. Shen et al.", - "type": "ListItem" - }, - { - "element_id": "030602b79a8995e9a457b875d94c016d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "6 Conclusion", - "type": "Title" - }, - { - "element_id": "e79cef57c86050aa5fc74e5cd3923197", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "LayoutParser provides a comprehensive toolkit for deep learning-based document image analysis. The off-the-shelf library is easy to install, and can be used to build flexible and accurate pipelines for processing documents with complicated structures. It also supports high-level customization and enables easy labeling and training of DL models on unique document image datasets. The LayoutParser community platform facilitates sharing DL models and DIA pipelines, inviting discussion and promoting code reproducibility and reusability. The LayoutParser team is committed to keeping the library updated continuously and bringing the most recent advances in DL-based DIA, such as multi-modal document modeling [37, 36, 9] (an upcoming priority), to a diverse audience of end-users.", - "type": "NarrativeText" - }, - { - "element_id": "dd3013dfc8b3df79d89268894b10571e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "Acknowledgements We thank the anonymous reviewers for their comments and suggestions. This project is supported in part by NSF Grant OIA-2033558 and funding from the Harvard Data Science Initiative and Harvard Catalyst. Zejiang Shen thanks Doug Downey for suggestions.", - "type": "NarrativeText" - }, - { - "element_id": "58c1b97c7988d78495527a0726a7c85a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "References", - "type": "Title" - }, - { - "element_id": "85e09a5617e58a3a78b22fd12eb29eaf", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[1] Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man´e, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi´egas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015), https://www.tensorflow.org/, software available from tensorflow.org", - "type": "ListItem" - }, - { - "element_id": "ad466edc2a12c9be4bf951fd8b5bf818", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[2] Alberti, M., Pondenkandath, V., W¨ursch, M., Ingold, R., Liwicki, M.: Deepdiva: a highly-functional python framework for reproducible experiments. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 423–428. IEEE (2018)", - "type": "ListItem" - }, - { - "element_id": "217777f3d44620afddc1e27553e81a66", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[3] Antonacopoulos, A., Bridson, D., Papadopoulos, C., Pletschacher, S.: A realistic dataset for performance evaluation of document layout analysis. In: 2009 10th International Conference on Document Analysis and Recognition. pp. 296–300. IEEE (2009)", - "type": "ListItem" - }, - { - "element_id": "292dd088dc6a174159395e31be7755d7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[4] Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. pp. 9365–9374 (2019)", - "type": "ListItem" - }, - { - "element_id": "52a5a7f582c381ec8c7c1abc7e983191", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[5] Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: A Large-Scale Hierarchical Image Database. In: CVPR09 (2009)", - "type": "ListItem" - }, - { - "element_id": "4e93c51c89970349aa9e0a42cb330c4b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[6] Deng, Y., Kanervisto, A., Ling, J., Rush, A.M.: Image-to-markup generation with coarse-to-fine attention. In: International Conference on Machine Learning. pp. 980–989. PMLR (2017)", - "type": "ListItem" - }, - { - "element_id": "8cfd166d282469f765423faae44271e2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 14 - }, - "text": "[7] Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International conference on machine learning. pp. 1180–1189. PMLR (2015)", - "type": "ListItem" - }, - { - "element_id": "8bce49aab693aad97676011688f3f6f3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "LayoutParser: A Unified Toolkit for DL-Based DIA", - "type": "ListItem" - }, - { - "element_id": "8605ad66ac8429ae6e92841d5026f0de", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "Gardner, M., Grus, J., Neumann, M., Tafjord, O., Dasigi, P., Liu, N., Peters, M., Schmitz, M., Zettlemoyer, L.: Allennlp: A deep semantic natural language processing platform. arXiv preprint arXiv:1803.07640 (2018) Lukasz Garncarek, Powalski, R., Stanistawek, T., Topolski, B., Halama, P., Graliriski, F.: Lambert: Layout-aware (language) modeling using bert for in- formation extraction (2020)", - "type": "NarrativeText" - }, - { - "element_id": "39972987462975e72ff97f3cc3d28223", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[10] Graves, A., Fern´andez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning. pp. 369–376 (2006)", - "type": "ListItem" - }, - { - "element_id": "559ea792f7c0c98e4af9e3436774efa9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[11] Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR). pp. 991–995. IEEE (2015) [12] He, K., Gkioxari, G., Doll´ar, P., Girshick, R.: Mask r-cnn. In: Proceedings of the", - "type": "NarrativeText" - }, - { - "element_id": "a25accb47954c56b35a06609449901ef", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "IEEE international conference on computer vision. pp. 2961–2969 (2017)", - "type": "ListItem" - }, - { - "element_id": "616320116770187bb631e2bcabdc44fe", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[13] He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770–778 (2016)", - "type": "ListItem" - }, - { - "element_id": "8ead02f7720d59492ca67a5cfddd4552", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[14] Kay, A.: Tesseract: An open-source optical character recognition engine. Linux J. 2007(159), 2 (Jul 2007)", - "type": "ListItem" - }, - { - "element_id": "ccf2aef698df297baac645bfbe87b5a4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[15] Lamiroy, B., Lopresti, D.: An open architecture for end-to-end document analysis benchmarking. In: 2011 International Conference on Document Analysis and Recognition. pp. 42–47. IEEE (2011)", - "type": "ListItem" - }, - { - "element_id": "7303875a4141fe55ab6c6538d2660269", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[16] Lee, B.C., Weld, D.S.: Newspaper navigator: Open faceted search for 1.5 million images. In: Adjunct Publication of the 33rd Annual ACM Sym- posium on User Interface Software and Technology. p. 120–122. UIST ’20 Adjunct, Association for Computing Machinery, New York, NY, USA (2020). https://doi.org/10.1145/3379350.3416143, https://doi-org.offcampus. lib.washington.edu/10.1145/3379350.3416143", - "type": "ListItem" - }, - { - "element_id": "484bdc79ca505343715e3d177bd17275", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[17] Lee, B.C.G., Mears, J., Jakeway, E., Ferriter, M., Adams, C., Yarasavage, N., Thomas, D., Zwaard, K., Weld, D.S.: The Newspaper Navigator Dataset: Extracting Headlines and Visual Content from 16 Million Historic Newspaper Pages in Chronicling America, p. 3055–3062. Association for Computing Machinery, New York, NY, USA (2020), https://doi.org/10.1145/3340531.3412767", - "type": "ListItem" - }, - { - "element_id": "d3a921d79a30615dcf174c93d2da8d4d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[18] Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: Table benchmark for image-based table detection and recognition. arXiv preprint arXiv:1903.01949 (2019)", - "type": "ListItem" - }, - { - "element_id": "8eea8c964496b9e3de3099a9af798467", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[19] Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll´ar, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision. pp. 740–755. Springer (2014)", - "type": "ListItem" - }, - { - "element_id": "de8aee29b21c13139f4875a90a52d0a0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[20] Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 3431–3440 (2015)", - "type": "ListItem" - }, - { - "element_id": "bce47bb8dec257c966d948be79e80094", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[21] Neudecker, C., Schlarb, S., Dogan, Z.M., Missier, P., Sufi, S., Williams, A., Wolsten- croft, K.: An experimental workflow development platform for historical document digitisation and analysis. In: Proceedings of the 2011 workshop on historical document imaging and processing. pp. 161–168 (2011)", - "type": "ListItem" - }, - { - "element_id": "7a372cbcf79efc9cc23d35644816ca15", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 15 - }, - "text": "[22] Oliveira, S.A., Seguin, B., Kaplan, F.: dhsegment: A generic deep-learning approach for document segmentation. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR). pp. 7–12. IEEE (2018)", - "type": "ListItem" - }, - { - "element_id": "2125d675311c00e01a24886e8a603ee1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "16 Z. Shen et al.", - "type": "ListItem" - }, - { - "element_id": "e5e88c91dcc8703ef7ffaf69fe565020", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[23] Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017) [24] Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019) [25] Pletschacher, S., Antonacopoulos, A.: The page (page analysis and ground-truth elements) format framework. In: 2010 20th International Conference on Pattern Recognition. pp. 257–260. IEEE (2010)", - "type": "NarrativeText" - }, - { - "element_id": "a647b5ee9dfd11735b912b0510f476a1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[26] Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from image- based documents. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 572–573 (2020)", - "type": "ListItem" - }, - { - "element_id": "70a42a501297733d90dbcae55dbc2b78", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[27] Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 142–147. IEEE (2019)", - "type": "ListItem" - }, - { - "element_id": "3d9af66828b6b1e385e04dcad340e403", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[28] Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems. pp. 91–99 (2015)", - "type": "ListItem" - }, - { - "element_id": "ff7c339e3258376076b2f515c6b0f01e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[29] Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE transactions on neural networks 20(1), 61–80 (2008) [30] Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162–1167. IEEE (2017)", - "type": "NarrativeText" - }, - { - "element_id": "410d64198e29b695d48db2cd3781daae", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[31] Shen, Z., Zhang, K., Dell, M.: A large dataset of historical japanese documents with complex layouts. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops. pp. 548–549 (2020)", - "type": "ListItem" - }, - { - "element_id": "fc8457575ed11e22f45c936aba277303", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[32] Shen, Z., Zhao, J., Dell, M., Yu, Y., Li, W.: Olala: Object-level active learning based layout annotation. arXiv preprint arXiv:2010.01762 (2020)", - "type": "ListItem" - }, - { - "element_id": "b66f47222b34c59b619b0f90b165b093", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[33] Studer, L., Alberti, M., Pondenkandath, V., Goktepe, P., Kolonko, T., Fischer, A., Liwicki, M., Ingold, R.: A comprehensive study of imagenet pre-training for historical document image analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 720–725. IEEE (2019)", - "type": "ListItem" - }, - { - "element_id": "93eb7c029c0a6d8353aba82f5f2d389d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[34] Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., et al.: Huggingface’s transformers: State-of- the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019) [35] Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https://", - "type": "NarrativeText" - }, - { - "element_id": "ba70589bb3f48ccf6e18724702cc1f10", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "github.com/facebookresearch/detectron2 (2019)", - "type": "ListItem" - }, - { - "element_id": "a8ce4311d30f1f7cba9043e30c9ad6d1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[36] Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020)", - "type": "ListItem" - }, - { - "element_id": "a9acaa0d527f89ed3f3c7daac7694a23", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[37] Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of text and layout for document image understanding (2019)", - "type": "ListItem" - }, - { - "element_id": "b0e2d232fd257ee8ca691ff77b74fcee", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 16 - }, - "text": "[38] Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for doc- In: 2019 International Conference on Document IEEE (Sep 2019). ument Analysis and Recognition (ICDAR). pp. 1015–1022. https://doi.org/10.1109/ICDAR.2019.00166 layout analysis.", - "type": "ListItem" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.json b/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.json new file mode 100644 index 0000000000..50af66d6be --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.json @@ -0,0 +1,11350 @@ +[ + { + "type": "Title", + "element_id": "f604d94aa69cf0073e13e3be1dced533", + "text": "Universal Declaration of Human Right - First article, all languages", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "f84bbc479d5bebf6b98c016e14d666d1", + "text": "\u00a9 1996 \u2013 2009 The Office of the High Commissioner for Human Rights", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1453c80530ef11712374570a086dbd64", + "text": "This plain text version prepared by the \"UDHR in Unicode\" project, https://www.unicode.org/udhr.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a2984ec4b6a45274d85da727619ff365", + "text": "------ (Bizisa) Novdiex nongv liex hufniv dav zer nier, zunxyanr niex qianrlir garhaf hufniv dav zer nier. Gixzex livxinf niex lianrxinx xief, xiongxdif guanxxif nier jinxsenr gof dav duifdaif dor.", + "metadata": { + "languages": [ + "deu", + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "8088ad87817694565cef5de84a691c31", + "text": "(Jinan)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "84ce1bd66b09ce990ee385a04144822e", + "text": "\u662f\u4eba\u90fd\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u5f1f\u5144\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "917277f019ba0320fe8bada59c1460ce", + "text": "(Klau) Chix zox key zifyour, an hu tsunxyanr thungs chianrlif nu phinrten. Tsoxnur nes lishinf thungs leyx o, laiv kuanxshif to tseyr ti cinxsenr shiangxtaif.", + "metadata": { + "languages": [ + "eng", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d80b950467968341968cda0842fa7086", + "text": "(Maiunan) Renr rangf lyeuf xif zifyaot, yur zunxyant ndams chwentlif xif pingtdengl. Ter mev lilsingf ndams lyangtsinx, zingxsent gwanxsif vaif nungf.", + "metadata": { + "languages": [ + "deu", + "eng", + "cym", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "99faf6e85f4e7e4c4588f87cbaa7598c", + "text": "(Mijisa) Novzeu nongv lie kylix dav zeu xi, zunyan nie qianlif gahaf kylix dav zeu xi. Gyxzeu livxinf nie lixtolo ca xie, xiongdif guanxif ai jinsen go duifdaif do.", + "metadata": { + "languages": [ + "cat", + "fra", + "ron", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8767af986f37d38b27b0acec306a3a9e", + "text": "(Minjiang, spoken) Renren sen xialai de si ziyou li, zai zunlian ho quanli sang yelue pinden. Tamen fuyou lixin ho liangxin, hai yingai na xiongdi guanxi li jinsen fuxiang duidai.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4c314188c2ec950b39129175c7024070", + "text": "(Minjiang, written) Renren sen er ziyou, zai zunlian ho quanli sang yelue pinden. Tamen fuyou lixin ho liangxin, bin yin yi xiongdi guanxi li jinsen xiang duidai.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b8ef759a5a22d6dfa9214df3680e8056", + "text": "(Muzzi) Nia ngir ngir ym mexker bbumlix zifyiyr, zunyanr gix jjuanlif alyf bbumlix zzifsof wur. Garxier lixxinf gix xierfux cor yif, xiongxdif guanxif wur jinsenr la lo rim hix.", + "metadata": { + "languages": [ + "eng", + "som", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dfabb35b82a82e16d7cb50d4de138e6f", + "text": "(Yeonbyeon) \uc0ac\ub78c\ub4e4\uc774 \uc774 \uc138\uacc4\ub85c \uc624\ub2e4\uac00 \ubaa8\ub450 \uc790\uc720\ud558\uace0, \uc874\uc5c4\uacfc \uad8c\ub9ac\uc774 \ud3c9\ub3d9\uc73c\ub85c \uc788\ub294\ub2e4, \uadf8\ub4e4 \ub9ac\uc131\uacfc \uc591\uc2ec\uc774 \uc788\ub208\uace0, \ud615\uc81c\uc758 \uc815\uc2e0\uc73c\ub85c \uc0c1\ud638\ub85c \uce58\ub8cc\ud558 \uc18c.", + "metadata": { + "languages": [ + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1f41b7646ca8aebc36e8f5ec392481fb", + "text": "Abkhaz \u0414\u0430\u0440\u0431\u0430\u043d\u0437\u0430\u0430\u043b\u0430\u043a \u0430\u0443\u0430\u04a9\u044b \u0434\u0448\u043e\u0443\u043f \u0438\u0445\u044b \u0434\u0430\u049b\u04d9\u0438\u04ad\u043d\u044b. \u0410\u0443\u0430\u0430 \u0437\u0435\u0433\u044c \u0437\u0438\u043d\u043b\u0435\u0438 \u043f\u0430\u0442\u0443\u043b\u0435\u0438 \u0435\u0438\u049f\u0430\u0440\u043e\u0443\u043f. \u0423\u0440\u04ad \u0438\u0440\u044b\u043c\u043e\u0443\u043f \u0430\u0445\u0448\u044b\u04a9\u0438 \u0430\u043b\u0430\u043c\u044b\u0441\u0438, \u0434\u0430\u0440\u0430 \u0434\u0430\u0440\u0430\u0433\u044c \u0430\u0435\u0448\u044c\u0435\u0438 \u0430\u0435\u0448\u044c\u0435\u0438 \u0440\u0435\u0438\u04a7\u0448 \u0435\u0438\u0437\u044b\u049f\u0430\u0437\u0430\u0440\u043e\u0443\u043f.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9d266a86305385214455dc0f297330bf", + "text": "Aceh Bandum manusia lahee bebah merdeka deungon hak ngon martabat nyang sama. Ngon akai taseumikee, ngon atee tameurasa bandum geutanyoe lagee syedara.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8754c8c0fc9122e6c416f7e32418a258", + "text": "Achuar-Shiwiar Aints ainauti mash metek nuwanmaya akiinawaitji. Turasha angkan pengker pujusmi tusar akiinawaitji. Aintstikia mash ji nintijai paan nintimratnuitji, turasha pengker aa nu nekaatnuitji. Turasha pase aa nusha nekaatnuitji. Turasha ji pataachiri ainaujai pengker nintimtunisar pujaj ina nunisrik chikich ainauj aisha pengker nintimtunisar pujustinuitji.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "f741e79073265d5678150ca96cd7732c", + "text": "Achuar-Shiwiar (1) Penker inintimsamka mash aintsti ankan, metekrin nuya nii penkerin takakui nii akiniamunmaya tu ausamti arantukmau atinuitji mai metekrak.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7691e5f9dd37d6bc38044534196c1e9f", + "text": "Adyghe \u0426\u04cf\u044b\u0444 \u043f\u0441\u0442\u044d\u0443\u0440\u0438 \u0448\u044a\u0445\u044c\u044d\u0444\u0438\u0442\u044d\u0443, \u044f\u043b\u044a\u044b\u0442\u044d\u043d\u044b\u0433\u044a\u044d\u0440\u044d \u044f\u0444\u044d\u0448\u044a\u0443\u0430\u0448\u044d\u0445\u044d\u043c\u0440\u044d\u043a\u04cf\u044d \u0437\u044d\u0444\u044d\u0434\u044d\u0443 \u043a\u044a\u0430\u043b\u044a\u0444\u044b. \u0410\u043a\u044a\u044b\u043b\u0440\u044d \u0437\u044d\u0445\u044d\u0448\u04cf\u044b\u043a\u04cf \u0433\u044a\u0443\u0430\u0437\u044d\u0440\u044d \u044f\u04cf\u044d\u0448\u044a\u044b, \u0437\u044b\u0440 \u0437\u044b\u043c \u0437\u044d\u043a\u044a\u043e\u0448 \u0437\u044d\u0445\u0430\u0448\u0406\u044d \u0430\u0437\u0444\u0430\u0433\u0443 \u0434\u044d\u043b\u044a\u044d\u0443 \u0437\u044d\u0444\u044b\u0449\u044b\u0442\u044b\u043d\u0445\u044d \u0444\u0430\u0435.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5473f0bb6f6cf5e0566fcddbbe1209c6", + "text": "Afar Karaamat kee garwa wagittaamal seehada inkih gide akkuk, currik taabuke. Usun kas kee cissi loonuuh, keenik mariiy mara lih toobokinni kasat gexsitam faxximta.", + "metadata": { + "languages": [ + "est", + "som", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "25b4641fe79e0196b278b85c923b6502", + "text": "Afrikaans Alle menslike wesens word vry, met gelyke waardigheid en regte, gebore. Hulle het rede en gewete en behoort in die gees van broederskap teenoor mekaar op te tree.", + "metadata": { + "languages": [ + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ec78c0842e210f3ff6e147ad4fd7a4ec", + "text": "Aguaruna Ashi aents aidauk agkan akinui, betek eme anentsa aentsmasa diyam atanmash, tuja aents anentaibau, aents dutikatasa wakej amu yupichu dutimainnum, tuja ni wakejamun takakush tikish bakushminnum, nuniak tikish aidaujaish shiig yatsuta anmamut ati tusa.", + "metadata": { + "languages": [ + "ind", + "swa", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "20509f92f090bb4ecf694ea5b01d0921", + "text": "Aja Agbet\u0254wo ple\u014bu van\u0254 gb\u025bm\u025b ko vovo\u0256eka gbesw\u025bgbesw\u025b, s\u0254to am\u025bnyinyi ko ac\u025bwo gom\u025b; wo x\u0254n\u0254 susunywin ko jim\u025bnywi so esexwe. Wo \u0256o a w\u025b n\u0254vi \u0256a\u0256a wowo n\u0254n\u0254wo gb\u0254.", + "metadata": { + "languages": [ + "afr", + "pol", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f6e32446c48b0755dfcf243a8142d613", + "text": "Albanian, Tosk T\u00eb gjith\u00eb njer\u00ebzit lindin t\u00eb lir\u00eb dhe t\u00eb barabart\u00eb n\u00eb dinjitet dhe n\u00eb t\u00eb drejta. Ata kan\u00eb arsye dhe nd\u00ebrgjegje dhe duhet t\u00eb sillen ndaj nj\u00ebri tjetrit me frym\u00eb v\u00ebllaz\u00ebrimi.", + "metadata": { + "languages": [ + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9a69378bfb3e4825a781de59826eff73", + "text": "Alemannisch (Elsassisch) \u00c0lli Mensche k\u00f9mme m\u00ect de gliche W\u00ecrde \u00f9n Rachte \u00f9ff d\u2019Walt. Sie h\u00e0n \u00e0lli Vern\u00f9nft \u00f9n Gew\u00ecsse \u00f9n selle m\u00ect Br\u00ecederlichkeit de \u00e0ndere gejjen\u00ecwwer h\u00e0ndle.", + "metadata": { + "languages": [ + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d5de29db1ca19f8ac33afb7049462513", + "text": "Altai, Southern \u041e\u043d\u0447\u043e \u0443\u043b\u0443\u0441 \u0430\u043a\u2010\u0458\u0430\u0440\u044b\u043a\u043a\u0430 \u0458\u0430\u0439\u044b\u043c \u043b\u0430 \u0442\u0435\u04a5\u2010\u0442\u0430\u0439 \u0442\u0430\u043f\u2010\u044d\u0440\u0438\u043a\u0442\u04f1 \u0442\u0443\u0443\u043b\u0430\u0442. \u041e\u043b\u043e\u0440 \u0441\u0430\u043d\u0430\u0430\u0443\u043a\u0430\u0430\u043b\u0443 \u043b\u0430 \u0447\u0435\u043a \u043a\u04f1\u04f1\u043d\u2010\u0442\u0430\u043f\u0442\u0443 \u0431\u043e\u043b\u0443\u043f \u0431\u04f1\u0442\u043a\u0435\u043d \u043b\u0435 \u0431\u043e\u0439\u2010\u0431\u043e\u0439\u044b\u043d \u043a\u0430\u0440\u044b\u043d\u0434\u0430\u0448 \u043a\u0438\u0440\u0435\u0437\u0438 \u043a\u04e7\u0440\u04e7\u0440 \u043b\u04e7 \u0458\u04f1\u0440\u0435\u0440 \u0443\u0447\u0443\u0440\u043b\u0443.", + "metadata": { + "languages": [ + "rus", + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f878523840c938997e3bbd0cd1f38c38", + "text": "Amahuaca Tzovan jato jumahaito hinaayamanonmun vacunoxcanquihnucanpu. Tzovan jato zinaayamanonmun vacunoxcanquihnucanpu. Jonitzan derechocavizyahtoxrivimun vacunoxcanquihqui. Quiyoovinin shinanquin hiromaquin jaucuzahavorahquiqui shinantimunhcanquihqui. Vacurazixquicavizhi quiyoovinixjatimunhcanquihnucanpu.", + "metadata": { + "languages": [ + "cat", + "spa", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d0963c28613cf0e49ccc8378af7f29b7", + "text": "Amarakaeri Aya'da aratbut katepi' eka'ta' on'pakpo ka'dik o\u0331'ne. Nog aratbut huadak o\u0331'nepo ko\u0331nigti opudo\u0331mey huadak mo'e\u0331. Aya'da huadak eka' nopoe\u0331'dik o\u0331'ne kenpa'ti dakhuea' eka' nopoe\u0331'dik o\u0331'ne kenpa'ti ko\u0331nig huama'buytaj o 'tihuapokika' ko\u0331nigti nogo\u0331meytaj tihuapokika 'dik o\u0331'ne.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "a0647ca94e22e1ec055ae4ae29467e8c", + "text": "Amharic", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "8c8d0d9098a83b293045f03fbe07358d", + "text": "\u12e8\u1230\u12cd\u1361\u120d\u1305\u1361\u1201\u1209\u1361\u1232\u12c8\u1208\u12f5\u1361\u1290\u133b\u1293\u1361\u1260\u12ad\u1265\u122d\u1293\u1361\u1260\u1218\u1265\u1275\u121d\u1361\u12a5\u12a9\u120d\u1290\u1275\u1361\u12eb\u1208\u12cd\u1361\u1290\u12cd\u1362\u1361\u12e8\u1270\u1348\u1325\u122e\u1361\u121b\u1235\u1270\u12cb\u120d\u1293\u1361\u1215\u120a\u1293\u1361\u1235\u120b\u1208\u12cd\u1361\u12a0\u1295\u12f1\u1361\u120c\u120b\u12cd\u1295\u1361\u1260\u12c8\u1295\u12f5\u121b\u121b\u127d\u1290\u1275\u1361\u1218\u1295\u1348\u1235\u1361\u1218\u1218\u120d\u12a8\u1275\u1361\u12ed\u1308\u1263\u12cb\u120d\u1362", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b73c2ef2ae45307ba7290eb553e346c0", + "text": "Amis Chiyu mahufuchay tu tamlaw, maemin pingdeng ichunyan a kngli. Iraay chaira lishing a naay a naay a harateng, pimaulahsha u harateng nu kaka shafa.", + "metadata": { + "languages": [ + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca25bafc08b1e2c757647d046263bceb", + "text": "Arabela Pueyano pa quishacari, puetunu pajaniyajanaa mariyata miishiya maninia, maja sooshiya tamonu. Puetunu pueyajanaari niishitiajaraca, jiuujiaaracanio pueyacua pa taraajenura. Naarate maninia pa jiyanootioore juhua pa tapueyocuaca.", + "metadata": { + "languages": [ + "swa", + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e1a81a0e10a38df3526fc4432de66ad3", + "text": "Arabic, Standard \u064a\u0648\u0644\u062f \u062c\u0645\u064a\u0639 \u0627\u0644\u0646\u0627\u0633 \u0623\u062d\u0631\u0627\u0631\u064b\u0627 \u0645\u062a\u0633\u0627\u0648\u064a\u0646 \u0641\u064a \u0627\u0644\u0643\u0631\u0627\u0645\u0629 \u0648\u0627\u0644\u062d\u0642\u0648\u0642. \u0648\u0642\u062f \u0648\u0647\u0628\u0648\u0627 \u0639\u0642\u0644\u0627\u064b \u0648\u0636\u0645\u064a\u0631\u064b\u0627 \u0648\u0639\u0644\u064a\u0647\u0645 \u0623\u0646 \u064a\u0639\u0627\u0645\u0644 \u0628\u0639\u0636\u0647\u0645 \u0628\u0639\u0636\u064b\u0627 \u0628\u0631\u0648\u062d \u0627\u0644\u0625\u062e\u0627\u0621.", + "metadata": { + "languages": [ + "ara" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "72d099b2761f12d204f35cc85600f8dd", + "text": "Armenian \u0532\u0578\u056c\u0578\u0580 \u0574\u0561\u0580\u0564\u056b\u056f \u056e\u0576\u057e\u0578\u0582\u0574 \u0565\u0576 \u0561\u0566\u0561\u057f \u0578\u0582 \u0570\u0561\u057e\u0561\u057d\u0561\u0580 \u056b\u0580\u0565\u0576\u0581 \u0561\u0580\u056a\u0561\u0576\u0561\u057a\u0561\u057f\u057e\u0578\u0582\u0569\u0575\u0561\u0574\u0562 \u0578\u0582 \u056b\u0580\u0561\u057e\u0578\u0582\u0576\u0584\u0576\u0565\u0580\u0578\u057e\u0589 \u0546\u0580\u0561\u0576\u0584 \u0578\u0582\u0576\u0565\u0576 \u0562\u0561\u0576\u0561\u056f\u0561\u0576\u0578\u0582\u0569\u0575\u0578\u0582\u0576 \u0578\u0582 \u056d\u056b\u0572\u0573 \u0587 \u0574\u056b\u0574\u0575\u0561\u0576\u0581 \u057a\u0565\u057f\u0584 \u0567 \u0565\u0572\u0562\u0561\u0575\u0580\u0561\u0562\u0561\u0580 \u057e\u0565\u0580\u0561\u0562\u0565\u0580\u057e\u0565\u0576\u0589", + "metadata": { + "languages": [ + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "38291b67d0eaef665797206e43651164", + "text": "Aromanian Tuti iats\u00e2li umineshts\u00e2 s-fac liberi shi egali la n\u00e2muzea shi-ndrepturli. Eali suntu h\u00e2rziti cu fichiri shi sinidisi shi lipseashti un cu alantu sh-si poart\u00e2 tu duhlu-a fr\u00e2ts\u00e2ljiljei.", + "metadata": { + "languages": [ + "ron", + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6bb51b6b82df3d4800c98e8415754489", + "text": "Ash\u00e1ninka Aquempetavacaajeita maaroni atiri. Timatsi aquenqueshirejeitantari maaroni, timatsi amejeitari, ayojeiti paitarica ocameetsati antajeitiri: te oncameetsateji intsaneapitsajeiteero itsipapee. Te oncameetsateji imperanajeitee, te oncameetsateji iroashinoncaajeitee, irointi ocameetsati aacameetsatavacaajeitea.", + "metadata": { + "languages": [ + "fin", + "est", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ef818e559e5b9629b3da213d71f6d693", + "text": "Ash\u00e9ninka, Pichis Maaroni atziripayeeni, ovaquera intzimapaaque, eero ocantzi i\u00f1aashitacaavaitaityaari iromperanataityaari. Eejatzi oquemitari iro\u00f1aaca te apantyaaro amanitashireteri atziri ancanteri: \"Te pirjiperote eeroca, iriima irinta iriitaque \u00f1aaperori\". Eejatzi oquemitari te oncameethate intzime aparoni atziri antayetashityaarone caari ishinetaacairi pashine irantero. Tema maaroni ayotziro ampampithashirvaayeta, ayotziro tsicarica otzimayetzi cameethatatsiri anteri o tsicarica otzimi caariperotatsiri, irootaque ocovaperotantari iro\u00f1aaca entacotavacaayetya anquemitacaantanaquero arentzitavacaatyeeyaami ocaaquiini.", + "metadata": { + "languages": [ + "ita", + "cat", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5cb0bb4fdc15b35295973bd4a2247bd1", + "text": "Assyrian Neo-Aramaic \u071f\u0720 \u0712\u072a\u0722\u072b\u0710 \u0712\u072a\u071d\u0720\u0717 \u071a\u0710\u072a\u0710 \u0718\u0712\u072a\u0712\u072a \u0713\u0718 \u0710\u071d\u0729\u072a\u0710 \u0718\u0719\u0715\u0729\u0710. \u0718\u0726\u071d\u072b\u071d\u0720\u0717 \u071d\u0717\u0712\u0710 \u0717\u0718\u0722\u0710 \u0718\u0710\u0722\u071d\u072c. \u0712\u0718\u0715 \u0715\u0710\u0717\u0710 \u0713\u072b\u0729\u072c\u071d \u0725\u0720 \u0710\u071a\u072a\u0722\u0710 \u0713\u072a\u0713 \u0717\u0718\u071d\u0710 \u0712\u071a\u0715 \u072a\u0718\u071a\u0710 \u0715\u0710\u071a\u0722\u0718\u072c\u0710.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fc37a0c903b4ad45223fa0a367de3b9b", + "text": "Asturian Tolos seres humanos nacen llibres y iguales en dignid\u00e1 y drechos y, pola mor de la raz\u00f3n y la conciencia de so, han comportase hermaniblemente los unos colos otros.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "beb1f5ccad6046d907217e2c355a3d20", + "text": "Awa", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "545202363638daaa690a4f5b184ad0da", + "text": "Cuaiquer", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "4e13c433d775a93f0bb6c40cbb2d5a03", + "text": "Aymara, Central Taqpach jaqejh khuskat u\u00f1jatat\u00e4pjhewa muna\u00f1apansa, lura\u00f1apansa, amuyasi\u00f1apansa, ukatwa jilani sullkan\u00edpjhaspas ukham u\u00f1jasipjha\u00f1apawa.", + "metadata": { + "languages": [ + "swa", + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8afc3caab3e458628b6f2efdb46fc6d1", + "text": "Azerbaijani, North (Cyrillic) \u0411\u04af\u0442\u04af\u043d \u0438\u043d\u0441\u0430\u043d\u043b\u0430\u0440 \u043b\u04d9\u0458\u0430\u0433\u04d9\u0442 \u0432\u04d9 \u04bb\u04af\u0433\u0443\u0433\u043b\u0430\u0440\u044b\u043d\u0430 \u049d\u04e9\u0440\u04d9 \u0430\u0437\u0430\u0434 \u0432\u04d9 \u0431\u04d9\u0440\u0430\u0431\u04d9\u0440 \u0434\u043e\u0493\u0443\u043b\u0443\u0440\u043b\u0430\u0440. \u041e\u043d\u043b\u0430\u0440\u044b\u043d \u0448\u04af\u0443\u0440\u043b\u0430\u0440\u044b \u0432\u04d9 \u0432\u0438\u04b9\u0434\u0430\u043d\u043b\u0430\u0440\u044b \u0432\u0430\u0440 \u0432\u04d9 \u0431\u0438\u0440-\u0431\u0438\u0440\u043b\u04d9\u0440\u0438\u043d\u04d9 \u043c\u04af\u043d\u0430\u0441\u0438\u0431\u04d9\u0442\u0434\u04d9 \u0433\u0430\u0440\u0434\u0430\u0448\u043b\u044b\u0433 \u0440\u0443\u04bb\u0443\u043d\u0434\u0430 \u0434\u0430\u0432\u0440\u0430\u043d\u043c\u0430\u043b\u044b\u0434\u044b\u0440\u043b\u0430\u0440.", + "metadata": { + "languages": [ + "rus", + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6d9f8766b1812e209f1a59654443299c", + "text": "Azerbaijani, North (Latin) B\u00fct\u00fcn insanlar l\u0259yaq\u0259t v\u0259 h\u00fcquqlar\u0131na g\u00f6r\u0259 azad v\u0259 b\u0259rab\u0259r do\u011fulurlar. Onlar\u0131n \u015f\u00fcurlar\u0131 v\u0259 vicdanlar\u0131 var v\u0259 bir-birl\u0259rin\u0259 m\u00fcnasib\u0259td\u0259 qarda\u015fl\u0131q ruhunda davranmal\u0131d\u0131rlar.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3681d23b771b9cf26263ab194af3430d", + "text": "Baatonum Ba t\u0254mbu kpuro marawa ba tii m\u0254, ba n\u025b, girima ka saria s\u0254\u0254. Ba ra bwisiku, ba dasabu m\u0254, ma n weene ba n waasin\u025b m\u025brobisiru s\u0254\u0254.", + "metadata": { + "languages": [ + "som", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fdd8c85dd832c6fb407357bf176d958a", + "text": "Bali Sami manusane sane nyruwadi wantah merdeka tur maduwe kautamaan lan hak-hak sane pateh. Sami kalugrain papineh lan idep tur mangdane pada masawitra melarapan semangat pakulawargaan.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "394114d333ed34e0add89b5e9079d474", + "text": "Bamanankan Hadamaden b\u025b\u025b danmak\u025b\u0272\u025bnen b\u025b bange, danbe ni josira la. Hakili ni taasi b\u2019u b\u025b\u025b la, wa u ka kan ka baden\u0272asira de waleya u ni \u0272\u0254g\u0254n c\u025b.", + "metadata": { + "languages": [ + "som", + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "31e2922fd7a67918fa2a09744965a970", + "text": "Bamun Pe na\u0302 mve\u0301 gu\u0301 puen nyu\u0308tu po\u0302 te mbe ku\u0301 ghe\u0301t ngam pua ngu\u0301enengu\u0301e mbe te wu\u0302me nsebe pua pa mfe\u0301e\u0301ke\u0302t. Pen a\u0302 ntu\u0301m te mbe ku\u0301 rem ngam pua fabshe ngam, a nshi nji\u0302r\u2019ap ne yi nsha\u0302ne nge\u0301tne nga shap po\u0302 te wupme ponta\u0302.", + "metadata": { + "languages": [ + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c5815bd56d9b0f7114cfa825514698ca", + "text": "Baoul\u00e9 Sran mun be ngba, k\u025b be wu be \u0254, be ngba be s\u025b, f\u0254ndi nun, sran-mmala nun. Be si akundanbu, be si su \u0254 fata k\u025b sran mun be tran'n, be tran aniaan nun tranl\u025b.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f937bd218ac832a520fee7be14b4e89c", + "text": "Bari \u014autu li\u014b a yu\u014bwe kana, jojo i to\u010firi ko \u010fekesi ko ti se tokitaki ko \u2018b\u00f6rik ko mul\u00f6k\u00f6tyo lo tolu\u014baseran. Se a \u010foka ko denet na kulya na\u2019but ko narok.", + "metadata": { + "languages": [ + "hrv", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "72f3bf8426b189087874ec1b7bd46496", + "text": "Basque Gizon-emakume guztiak aske jaiotzen dira, duintasun eta eskubide berberak dituztela; eta ezaguera eta kontzientzia dutenez gero, elkarren artean senide legez jokatu beharra dute.", + "metadata": { + "languages": [ + "ind", + "spa", + "nld" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5ce714cfa1def0c0d951bf7bff485500", + "text": "Belarusan \u0423\u0441\u0435 \u043b\u044e\u0434\u0437\u0456 \u043d\u0430\u0440\u0430\u0434\u0436\u0430\u044e\u0446\u0446\u0430 \u0441\u0432\u0430\u0431\u043e\u0434\u043d\u044b\u043c\u0456 \u0456 \u0440\u043e\u045e\u043d\u044b\u043c\u0456 \u045e \u0441\u0432\u0430\u0451\u0439 \u0433\u043e\u0434\u043d\u0430\u0441\u0446\u0456 \u0456 \u043f\u0440\u0430\u0432\u0430\u0445. \u042f\u043d\u044b \u043d\u0430\u0434\u0437\u0435\u043b\u0435\u043d\u044b \u0440\u043e\u0437\u0443\u043c\u0430\u043c \u0456 \u0441\u0443\u043c\u043b\u0435\u043d\u043d\u0435\u043c \u0456 \u043f\u0430\u0432\u0456\u043d\u043d\u044b \u0441\u0442\u0430\u0432\u0456\u0446\u0446\u0430 \u0430\u0434\u0437\u0456\u043d \u0434\u0430 \u0430\u0434\u043d\u0430\u0433\u043e \u045e \u0434\u0443\u0445\u0443 \u0431\u0440\u0430\u0446\u0442\u0432\u0430.", + "metadata": { + "languages": [ + "ukr", + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f1c956f483b1e3df941f43c5bba31674", + "text": "Bemba Abantu bonse bafyalwa abalubuka nokulingana mu mucinshi nensambu. Balikwata amano nokutontonkanya, eico bafwile ukulacita ifintu ku banabo mu mutima wa bwananyina.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "bb5acaee87121a890d36cb7afd3ad15a", + "text": "Bengali \u09b8\u09ae\u09b8\u09cd\u09a4 \u09ae\u09be\u09a8\u09c1\u09b7 \u09b8\u09cd\u09ac\u09be\u09a7\u09c0\u09a8\u09ad\u09be\u09ac\u09c7 \u09b8\u09ae\u09be\u09a8 \u09ae\u09b0\u09cd\u09af\u09be\u09a6\u09be \u098f\u09ac\u0982 \u0985\u09a7\u09bf\u0995\u09be\u09b0 \u09a8\u09bf\u09af\u09bc\u09c7 \u099c\u09a8\u09cd\u09ae\u0997\u09cd\u09b0\u09b9\u09a3 \u0995\u09b0\u09c7\u0964 \u09a4\u09be\u0981\u09a6\u09c7\u09b0 \u09ac\u09bf\u09ac\u09c7\u0995 \u098f\u09ac\u0982 \u09ac\u09c1\u09a6\u09cd\u09a7\u09bf \u0986\u099b\u09c7; \u09b8\u09c1\u09a4\u09b0\u09be\u0982 \u09b8\u0995\u09b2\u09c7\u09b0\u0987 \u098f\u0995\u09c7 \u0985\u09aa\u09b0\u09c7\u09b0 \u09aa\u09cd\u09b0\u09a4\u09bf \u09ad\u09cd\u09b0\u09be\u09a4\u09c3\u09a4\u09cd\u09ac\u09b8\u09c1\u09b2\u09ad \u09ae\u09a8\u09cb\u09ad\u09be\u09ac \u09a8\u09bf\u09df\u09c7 \u0986\u099a\u09b0\u09a3 \u0995\u09b0\u09be \u0989\u099a\u09bf\u09a4\u0964", + "metadata": { + "languages": [ + "ben" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "d5919948b12c6b7e2c5179487170dd51", + "text": "Bhojpuri \u0938\u092c\u0939\u093f \u0932\u094b\u0915\u093e\u0928\u093f \u0906\u091c\u093e\u0926\u0947 \u091c\u092e\u094d\u092e\u0947\u0932\u093e \u0906\u0913\u0930 \u0913\u0916\u093f\u0928\u093f\u092f\u094b \u0915\u0947 \u092c\u0930\u093e\u092c\u0930 \u0938\u092e\u094d\u092e\u093e\u0928 \u0906\u0913\u0930 \u0905\u0927\u093f\u0915\u093e\u0930 \u092a\u094d\u0930\u093e\u092a\u094d\u0924 \u0939\u0935\u0947\u0964 \u0913\u0916\u093f\u0928\u093f\u092f\u094b \u0915\u0947 \u092a\u093e\u0938 \u0938\u092e\u091d-\u092c\u0942\u091d \u0906\u0913\u0930 \u0905\u0902\u0924:\u0915\u0930\u0923 \u0915\u0947 \u0906\u0935\u093e\u091c \u0939\u094b\u0916\u0924\u093e \u0906\u0913\u0930 \u0939\u0941\u0928\u0915\u094b \u0915\u0947 \u0926\u094b\u0938\u0930\u093e \u0915\u0947 \u0938\u093e\u0925 \u092d\u093e\u0908\u091a\u093e\u0930\u093e \u0915\u0947 \u092c\u0947\u0935\u0939\u093e\u0930 \u0915\u0930\u0947 \u0915\u0947 \u0939\u094b\u0916\u0932\u093e\u0964", + "metadata": { + "languages": [ + "hin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4b7a06328d8e5cf300c21e03834148ad", + "text": "Bicolano, Central An gabos na tawo ipinangaking may katalinkasan asin parantay sa dignidad asin derechos. Sinda gabos tinawan nin pag-isip asin conciencia kaya dapat na makipag-iriba sa lambang saro bilang mga magturugang.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e82a426f8b4696143791e8f79987471d", + "text": "Bislama Evri man mo woman i bon fri mo ikwol long respek mo ol raet. Oli gat risen mo tingting mo oli mas tritim wanwan long olgeta olsem ol brata mo sista.", + "metadata": { + "languages": [ + "tgl", + "eng", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "09176e19ded6b0ff879ead0799cc2302", + "text": "Bora P\u00e1meere \u00ed\u00ed\u00f1\u00faj\u0268ri me\u00edjcyame ts\u00e1 m\u00fah\u00f3j\u0268\u0301s\u0268\u0301 pa\u00f1\u00e9 \u0268\u0301cub\u00e1hr\u00e1d\u00fa me\u00edjcy\u00e1\u00edtyur\u00f3ne. P\u00e1meere tsahd\u00far\u00e9 im\u00ed me\u00edjcyame mew\u00e1jy\u00fajcats\u00ed\u00f1e m\u00e9p\u0268\u0301\u00e1\u00e1b\u00f3jcats\u00ediy\u00e1 tsaat\u00e9k\u00e9 \u00e9hd\u0268\u0301\u0208\u0301v\u00e1llet\u00fam\u00e9 \u00e9hne m\u00fau m\u00e9pa\u00f1\u00e9t\u00fa\u00e9n\u00e9 nahb\u00e9muma me\u00edjcyadu.", + "metadata": { + "languages": [ + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5e3ff47fa6202cd3f10a179ea2b898e3", + "text": "Bosnian (Cyrillic) \u0421\u0432\u0430 \u0459\u0443\u0434\u0441\u043a\u0430 \u0431\u0438\u045b\u0430 \u0440\u0430\u045b\u0430\u0458\u0443 \u0441\u0435 \u0441\u043b\u043e\u0431\u043e\u0434\u043d\u0430 \u0438 \u0458\u0435\u0434\u043d\u0430\u043a\u0430 \u0443 \u0434\u043e\u0441\u0442\u043e\u0458\u0430\u043d\u0441\u0442\u0432\u0443 \u0438 \u043f\u0440\u0430\u0432\u0438\u043c\u0430. \u041e\u043d\u0430 \u0441\u0443 \u043e\u0431\u0434\u0430\u0440\u0435\u043d\u0430 \u0440\u0430\u0437\u0443\u043c\u043e\u043c \u0438 \u0441\u0432\u0438\u0458\u0435\u0448\u045b\u0443 \u0438 \u0442\u0440\u0435\u0431\u0430 \u0434\u0430 \u0458\u0435\u0434\u043d\u043e \u043f\u0440\u0435\u043c\u0430 \u0434\u0440\u0443\u0433\u043e\u043c\u0435 \u043f\u043e\u0441\u0442\u0443\u043f\u0430\u0458\u0443 \u0443 \u0434\u0443\u0445\u0443 \u0431\u0440\u0430\u0442\u0441\u0442\u0432\u0430.", + "metadata": { + "languages": [ + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8918cf337af35db75c0b7e3a98572814", + "text": "Bosnian (Latin) Sva ljudska bi\u0107a ra\u0111aju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i svije\u0161\u0107u i treba da jedno prema drugome postupaju u duhu bratstva.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4f74a58266d23d68a787e2a91434a97d", + "text": "Breton Dieub ha par en o dellezegezh hag o gwirio\u00f9 eo ganet an holl dud. Poell ha skiant zo dezho ha dleout a reont beva\u00f1 an eil gant egile en ur spered a genvreudeuriezh.", + "metadata": { + "languages": [ + "nld", + "cym" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "289a54f8ff934baba89545ec92d5b768", + "text": "Bugis Sininna rupa tau ri jajiangngi rilinoe nappunnai manengngi riasengnge alebbireng . Nappunai riasengnge akkaleng, nappunai riasengnge ati marennni na sibole bolena pada sipakatau pada massalasureng.", + "metadata": { + "languages": [ + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "24a3cf3bd02d17e2f2b065bab51c8e70", + "text": "Bulgarian \u0412\u0441\u0438\u0447\u043a\u0438 \u0445\u043e\u0440\u0430 \u0441\u0435 \u0440\u0430\u0436\u0434\u0430\u0442 \u0441\u0432\u043e\u0431\u043e\u0434\u043d\u0438 \u0438 \u0440\u0430\u0432\u043d\u0438 \u043f\u043e \u0434\u043e\u0441\u0442\u043e\u0439\u043d\u0441\u0442\u0432\u043e \u0438 \u043f\u0440\u0430\u0432\u0430. \u0422\u0435 \u0441\u0430 \u043d\u0430\u0434\u0430\u0440\u0435\u043d\u0438 \u0441 \u0440\u0430\u0437\u0443\u043c \u0438 \u0441\u044a\u0432\u0435\u0441\u0442 \u0438 \u0441\u043b\u0435\u0434\u0432\u0430 \u0434\u0430 \u0441\u0435 \u043e\u0442\u043d\u0430\u0441\u044f\u0442 \u043f\u043e\u043c\u0435\u0436\u0434\u0443 \u0441\u0438 \u0432 \u0434\u0443\u0445 \u043d\u0430 \u0431\u0440\u0430\u0442\u0441\u0442\u0432\u043e.", + "metadata": { + "languages": [ + "bul" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "61589cb2ca0346e6af7f49a73b4125b3", + "text": "Bulu Abiali bod bese, tege ai sesala, bene etie dzia a mis memvende y'enyi\u00f1, dzom dzia etu fili nk\u00f3b\u00f3, fili ntsogan, fili mboan. Ve abiali te, mod ose ayem dze ene abe, dze ene mbe\u00f1 asu e mod mbog antoa ai mfi na enyi\u00f1 ewulu mezen mene sosoo.", + "metadata": { + "languages": [ + "ron", + "cat", + "ita", + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "6dbacafdbc68b6ba0689b2d27b2ede49", + "text": "Burmese \u101c\u1030\u1010\u102d\u102f\u1004\u103a\u1038\u101e\u100a\u103a \u1010\u1030\u100a\u102e \u101c\u103d\u1010\u103a\u101c\u1015\u103a\u101e\u1031\u102c \u1002\u102f\u100f\u103a\u101e\u102d\u1000\u1039\u1001\u102c\u1016\u103c\u1004\u1037\u103a \u101c\u100a\u103a\u1038\u1000\u1031\u102c\u1004\u103a\u1038\u104a \u1010\u1030\u100a\u102e\u101c\u103d\u1010\u103a\u101c\u1015\u103a\u101e\u1031\u102c \u1021\u1001\u103d\u1004\u1037\u103a\u1021\u101b\u1031\u1038\u1019\u103b\u102c\u1038\u1016\u103c\u1004\u1037\u103a \u101c\u100a\u103a\u1038\u1000\u1031\u102c\u1004\u103a\u1038\u104a \u1019\u103d\u1031\u1038\u1016\u103d\u102c\u1038\u101c\u102c\u101e\u1030\u1019\u103b\u102c\u1038 \u1016\u103c\u1005\u103a\u101e\u100a\u103a\u104b \u1011\u102d\u102f\u101e\u1030\u1010\u102d\u102f\u1037\u104c \u1015\u102d\u102f\u1004\u103a\u1038\u1001\u103c\u102c\u1038 \u101d\u1031\u1016\u1014\u103a\u1010\u1010\u103a\u101e\u1031\u102c \u1009\u102c\u100f\u103a\u1014\u103e\u1004\u1037\u103a \u1000\u103b\u1004\u1037\u103a\u101d\u1010\u103a \u101e\u102d\u1010\u1010\u103a\u101e\u1031\u102c \u1005\u102d\u1010\u103a\u1010\u102d\u102f\u1037\u101b\u103e\u102d\u1000\u103c\u104d \u1011\u102d\u102f\u101e\u1030\u1010\u102d\u102f\u1037\u101e\u100a\u103a \u1021\u1001\u103b\u1004\u103a\u1038\u1001\u103b\u1004\u103a\u1038 \u1019\u1031\u1010\u1039\u1010\u102c\u1011\u102c\u1038\u104d \u1006\u1000\u103a\u1006\u1036\u1000\u103b\u1004\u1037\u103a\u101e\u102f\u1036\u1038\u101e\u1004\u1037\u103a\u104f\u104b", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7b5c1459fc45a2821c0d05cd98c1996f", + "text": "Bushi \u0181inadamu djabi nitirahinyi an-nafasi, reu bokeu mira\u014ba amin\u2019ni usheu ndreka haki. Reu teraka ndreka \u00e3kili ndreka hikima, amin\u2019ni zenyi, reu nikulazimu nisi twera\u014ba nin-fihava\u014ba reu sambi reu.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "799ae8db337401700ed035d921a073fc", + "text": "Candoshi-Shapra Iy tpotsini ichigoroni kis tamam zadkini, vatam tpotsiniva. Vatam ichigoroni magini tarova; ashiriya chinakaniya. Ashirocha, zovalliatsich tamaparia-ashiros sanpata chinagtsa atiniya.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b2fc5040743a2c1361f561dfc9c2e365", + "text": "Caquinte Aquejetavacaajiaca maasano caquinte. Chooca aquenquejantaca maasano, chooca amejigaca, atsajiaque taaca opajitapae ocameetsataque antajiguica. Tee oncameetsateji iromperaperanajicaji, tee oncameetsateji irogashinoncajajiacaji. Jero cameetsatatsica aavacaj aiaquempa.", + "metadata": { + "languages": [ + "cat", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "296f3e08ce32c544b7ce3922abf32c6c", + "text": "Cashibo-Cacataibo Ui uni cara 'iti ic\u00eb axbi ca b\u00ebtsi unib\u00eb gobiernon\u00ebn isc\u00ebx s\u00ebn\u00ebn it\u00ed ic\u00ebn. Ui cara ain tita ain papa 'iaxa quixun sinanquinma ca gobiernon\u00ebn sinanc\u00ebx ax b\u00ebtsib\u00eb s\u00ebn\u00ebn 'ic\u00ebn. Camaxunbi ca sinanti 'unanin. Camaxunbi ca a\u00f1u \u00f1u ati cara as\u00e1bi 'ic\u00ebn, a\u00f1u \u00f1u 'ati cara 'aisama 'ic\u00eb quixun 'unanti 'ic\u00ebn. Usa 'ain ca camaxbi ain xuc\u00ebnb\u00eb 'ic\u00ebsaribiti nuiananti 'ic\u00ebn.", + "metadata": { + "languages": [ + "sqi", + "cat", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "db88e492d6becfc01c06289965d5b5b4", + "text": "Cashinahua Yudabu dasibi jabiaskadi akin, xinantidubuki. Javen taea jau jaibunamenunbunven.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "75c025da4f4c95d2f428dc459b739bef", + "text": "Catalan-Valencian-Balear Tots els \u00e9ssers humans neixen lliures i iguals en dignitat i en drets. S\u00f3n dotats de ra\u00f3 i de consci\u00e8ncia, i han de comportar-se fraternalment els uns amb els altres.", + "metadata": { + "languages": [ + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6bc8b32628a3759388373fd8872ce83d", + "text": "Cebuano Ang tanang katawhan gipakatawo nga may kagawasan ug managsama sa kabililhon. Sila gigasahan sa salabutan ug tanlag og mag-ilhanay isip managsoon sa usa'g-usa diha sa diwa sa ospiritu.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "346a128271cb055071a9b9d4548d0488", + "text": "Chachi Naaju chachilla bain mu' chachilla bain na kayatu tichiba bulla jutyu naakendya'ba kenu deechu taa na kayamu deju, tsenminya,naaju ju\u00f1u bain ne tsaave ti', uukavinu jutyu naa, tideechu juuchi bain, mubain mubain tsaren dejuve, tsenmin shilli pensangenu pude deju'. mitya, tsenr)1in ura' kendu bain ura' kendyu' bain mide' mitya muba mu bain veta' veta' ura' keewaawaa kenuu dejuve.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "0b1ae7cf56e3557ef9acecc99806172b", + "text": "Chakma \ud804\udd1d\ud804\udd2c\ud804\udd07\ud804\udd34 \ud804\udd1f\ud804\udd1a\ud804\udd2a\ud804\udd0c\ud804\udd34 \ud804\udd1a\ud804\udd28\ud804\udd22\ud804\udd28\ud804\udd1e\ud804\udd28\ud804\udd23\ud804\udd28 \ud804\udd25\ud804\udd27\ud804\udd01 \ud804\udd03\ud804\udd28\ud804\udd0c\ud804\udd34\ud804\udd0e\ud804\udd2e\ud804\udd16\ud804\udd34 \ud804\udd03\ud804\udd33\ud804\udd03 \ud804\udd03\ud804\udd07\ud804\udd34\ud804\udd07\ud804\udd25\ud804\udd01 \ud804\udd1a\ud804\udd28\ud804\udd1a\ud804\udd2c\ud804\udd2d \ud804\udd0e\ud804\udd27\ud804\udd1a\ud804\udd34\ud804\udd1f\ud804\udd1a\ud804\udd34\ud804\udd41 \ud804\udd16\ud804\udd22\ud804\udd22\ud804\udd34 \ud804\udd03\ud804\udd2c\ud804\udd18 \ud804\udd03\ud804\udd33\ud804\udd03 \ud804\udd1d\ud804\udd2a\ud804\udd16\ud804\udd34\ud804\udd19\ud804\udd28 \ud804\udd03\ud804\udd0a\ud804\udd2c; \ud804\udd25\ud804\udd2c\ud804\udd1a\ud804\udd27\ud804\udd16\ud804\udd33\ud804\udd20\ud804\udd34 \ud804\udd1d\ud804\udd2c\ud804\udd07\ud804\udd34\ud804\udd05\ud804\udd1a\ud804\udd27\ud804\udd22\ud804\udd34 \ud804\udd03\ud804\udd2c\ud804\udd07\ud804\udd34\ud804\udd0e\ud804\udd27\ud804\udd1a\ud804\udd34 \ud804\udd03\ud804\udd22\ud804\udd2c\ud804\udd07\ud804\udd34 \ud804\udd0e\ud804\udd27\ud804\udd1a\ud804\udd27\ud804\udd22\ud804\udd34 \ud804\udd1b\ud804\udd33\ud804\udd22\ud804\udd27\ud804\udd16\ud804\udd28 \ud804\udd09\ud804\udd27\ud804\udd1f\ud804\udd34 \ud804\udd18\ud804\udd2e\ud804\udd23\ud804\udd34 \ud804\udd0c\ud804\udd28\ud804\udd18\ud804\udd33\ud804\udd20\ud804\udd2c \ud804\udd1a\ud804\udd28\ud804\udd1a\ud804\udd2c\ud804\udd2d \ud804\udd0c\ud804\udd27\ud804\udd23\ud804\udd1a \ud804\udd05\ud804\udd2a\ud804\udd0c\ud804\udd28\ud804\udd16\ud804\udd34\ud804\udd41", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "72d0a5cc00f21c3695d03aa4624b3452", + "text": "Chamorro Todo taotao siha man mafanago libertao yan pareho gi dignidad yan derecho siha, man manae siha hinaso yan consiencia yan debe de ufatinas contra uno yan otro gi un espiritun chumelo.", + "metadata": { + "languages": [ + "spa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "87e7fb3e75a3a124c8e4bce8573a5dd1", + "text": "Chayahuita Ya'ipi piyapinpoa' capini noya ninosorocaso' ya'hu\u00ebrin. Ya'ipinpoa' yonquir\u00ebhua'. Noya nicacaso' nitot\u00ebr\u00ebhua'. Napoaton iyanpoa pochin ninosorocaso' ya 'hu\u00ebrin.", + "metadata": { + "languages": [ + "tgl", + "swa", + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "03ea2a4dd341c6cdd4c3ddd814721290", + "text": "Cherokee (cased) \u13c2\uab76\uaba3 \uab70\uab92\u13fc\uabbb \uab74\uab8e\uaba5\uab95\uab72 \uab74\uab8e\uabaa\uaba3\uab84\uaba3 \uab70\uab84 \uab71\uabb7\uab83\uab7d\uab99 \uab8e\uab72 \uab70\uabb2\uab99\uaba9\uaba7 \uab70\uab84 \uab74\uab92\uab82 \uab72\u13fb\uab8e\uabab\uaba7\uab72. \u13be\uab9d\uab79\uab8e\uab93 \uab74\uab85\uab9d\uab7a\uab88\uaba4\uab95\uab79 \uab74\uabb0\uabbf\uab9d\uaba7 \uab95\u13f8\uab85\uabab\uab79 \uab70\uab84 \uab70\uaba3\uab95\uaba6\uabaf\uaba3\uab9d\uaba7 \uab70\uab84 \uab71\uab85\uab9d\uaba7 \uab9f\u13fc\uabbb\uab7d \uab92\uabaa\uab8e\uaba3\uabab\uab8e\uaba5\uab7c\uab79 \uab8e \uaba7\uab8e\uaba3\uab95\uabaf \uab70\uaba3\uab95\uaba9 \uab7c\uaba7.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "09009508dba31db1f130bf24d409614e", + "text": "Cherokee (uppercase) \u13c2\u13a6\u13d3 \u13a0\u13c2\u13f4\u13eb \u13a4\u13be\u13d5\u13c5\u13a2 \u13a4\u13be\u13da\u13d3\u13b4\u13d3 \u13a0\u13b4 \u13a1\u13e7\u13b3\u13ad\u13c9 \u13be\u13a2 \u13a0\u13e2\u13c9\u13d9\u13d7 \u13a0\u13b4 \u13a4\u13c2\u13b2 \u13a2\u13f3\u13be\u13db\u13d7\u13a2. \u13be\u13cd\u13a9\u13be\u13c3 \u13a4\u13b5\u13cd\u13aa\u13b8\u13d4\u13c5\u13a9 \u13a4\u13e0\u13ef\u13cd\u13d7 \u13c5\u13f0\u13b5\u13db\u13a9 \u13a0\u13b4 \u13a0\u13d3\u13c5\u13d6\u13df\u13d3\u13cd\u13d7 \u13a0\u13b4 \u13a1\u13b5\u13cd\u13d7 \u13cf\u13f4\u13eb\u13ad \u13c2\u13da\u13be\u13d3\u13db\u13be\u13d5\u13ac\u13a9 \u13be \u13d7\u13be\u13d3\u13c5\u13df \u13a0\u13d3\u13c5\u13d9 \u13ac\u13d7.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca845e694f20fb1947def444cd1f59f9", + "text": "Chickasaw Himmaka' nittakookano hattak yokasht toksalicha'nikat ki'yo. Hattak m\u00f3\u0331makat itt\u00edllawwi b\u00edyyi'kacha nanna m\u00f3\u0331maka\u0331 ittibaachaffa'hitok.", + "metadata": { + "languages": [ + "swa", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2106a38ef8d9320b2c785f5e0827b8ff", + "text": "Chin, Falam Mikip in bangrep ii zalen nak le sunlawih nak thawn, bangrep in covo nei in, asuak mi kan si. Anmah in hleidan thei nak fim nak le nuncan neih thei nak ruah nak nei ii, pakhat le pakhat duh dawt nak, pawl awk nak nei ding asi.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "83538732daca0377dcf87942c7e10b41", + "text": "Chin, Haka Mi vialte hna cu zalong te, ai tluk te le upat tihzah awktlak le thiltikhawhnak tinvo a ngei in a chuak mi kan si dih. Minung cu a chia a tha thleidang khomi ruahnak le theihthiamnak ziaza tha a ngeimi kan si caah pakhat le pakhat dawtnak he i pehtlaihnak le i hawikawmhnak ngeih ding kan si.", + "metadata": { + "languages": [ + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2dc80f80340d36e85a551642585e592a", + "text": "Chin, Matu Thlangboeih he rhimomna, vanpitna, yalpona hamhmoel ka tawn thlang la cuun la ng\u2019om u. Thlanghing he athae-then paekboe thaina neh yakming thaina moeiboe ka tawn thlang la n\u2019om u dong ah khat neh khat lungvat na neh thloehlan voekhlak u thae ham om.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "78bcf1425ec172c6ded201b1814121a0", + "text": "Chin, Tedim Mihingte khempeuh in thupitak leh thuneihna tawh suakta tak leh akibang in kipiang ciat ahi hi. Asia leh apha khentel thei thungaihsutna nei ciat uh ahihman in khat leh khat sanggam unau lungsim tawh kithuah khop ding hi.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "66e7bb8d8db209646cecea79ecf23f89", + "text": "Chinantec, Chiltepec Lej\u0268\u0308 ni sou tsa lisia\u0331 ija\u0331a sia ikou' ne kojo\u0331 j\u00ef ne juso\u0331 ne jmo' re ju i s\u0268' jmo' n\u00f6 sala\u0331 ne sasno.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b29e38dc8292efa10880271bbb145f07", + "text": "Chinantec, Ojitl\u00e1n La juu dsa lu si\u00e4 \u2013Dsa k\u00f6 \u00f1i ba dsa, n\u00eda k\u00f6 ni' ba na lu' dsa e dsa t\u00ef \u00e9 li jnia' ro\u00f6'.", + "metadata": { + "languages": [ + "fin", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "621ff48c1c000fb4631b736f4a775ada", + "text": "Chinese, Gan", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "be604439089a8fedd5abdc4d81187599", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5fd7\u5411\u8ddf\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u6e20\u4eec\u8d4b\u6709\u7406\u6027\u8ddf\u826f\u5fc3\uff0c\u5e76\u7406\u5f53\u4ee5\u5f1f\u5144\u4e49\u6c14\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "2c7b5f9c25a7c50b8ff18bf830c23d94", + "text": "Chinese, Hakka", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "05e53430ff030465078e511efc0de0b2", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u540c\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4f62\u4e01\u4eba\u8d4b\u6709\u7406\u6027\u540c\u597d\u5fc3\u7530\uff0c\u5e76\u5e94\u4ee5\u5144\u5f1f\u5173\u7cfb\u4e2a\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "ebfd1fe8c2a3f89dc902d868e38f4ee4", + "text": "Chinese, Jinyu", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "549cb1628fe3e0cafb78cd92f08f0554", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u5f1f\u5144\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "c49bd62da37860cce7a77fe260bebf2b", + "text": "Chinese, Mandarin (Beijing)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "bf0df306ed131c2adf4243ded3865e6a", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u6328\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u522c\u5e73\u7b49\u3002\u4ed6\u4eec\u8d81\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u4e00\u4e2a\u5ea7\u513f\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "7c55aa8729281fbd5c6fc4c3ab62d9a1", + "text": "Chinese, Mandarin (Guiyang)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "ba1e57780fc9d286c63be7e8e73e3c2e", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u5f8b\u662f\u5e73\u7b49\u7684\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u4e92\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "84fcd3db2129561d730906d791a0b309", + "text": "Chinese, Mandarin (Harbin)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "bdf44eafec897495cf404ac895e41ee3", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e4b\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u54e5\u4eec\u5f1f\u5144\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "d03595a53957527885710152809f751a", + "text": "Chinese, Mandarin (Nanjing)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "a96206ba057e6ac6c0fdb4c87d21a1c9", + "text": "\u5927\u5bb6\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u544a\u6743\u5229\u4e0a\u5934\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u544a\u826f\u5fc3\uff0c\u5e76\u8be5\u6d3e\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "f25715562a899772b8aed3cfb3962ec1", + "text": "Chinese, Mandarin (Simplified)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "c185fc727614ade15888d1e8c9a00c4d", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531,\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3,\u5e76\u5e94\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "2d117756d05c842183baab2b6b9ec4be", + "text": "Chinese, Mandarin (Tianjin)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "9e8a7703ae5139a2870b236cfa54cfd6", + "text": "\u4eba\u4e2a\u9876\u4e2a\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u822c\u513f\u822c\u513f\u5927\u3002\u4ed6\u4eec\u8d81\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "88b1f3c15657c3079cb323e733d61a60", + "text": "Chinese, Mandarin (Traditional)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "0e1d6539c2001d2ba8e3188f43b83f7f", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u56b4\u548c\u6b0a\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u5011\u8ce6\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u4e26\u61c9\u4ee5\u5144\u5f1f\u95dc\u4fc2\u7684\u7cbe\u795e\u76f8\u5c0d\u5f85\u3002", + "metadata": { + "languages": [ + "kor", + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "d4e689db8d161e7f3c8d633b36d869ad", + "text": "Chinese, Min Nan", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "48659e28c3b04b69caeaa16aded28f58", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u5408\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u56e0\u8d4b\u6709\u813e\u80c3\u5408\u9053\u884c\uff0c\u5e76\u7740\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "7f3fbf32ccbb91a3f12ae80cec59c7cb", + "text": "Chinese, Wu", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "c8272c39e78f413c6902b423da92287d", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u62c9\u5c0a\u4e25\u8131\u4ed4\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4f0a\u62c9\u6709\u7406\u6027\u8131\u4ed4\u826f\u5fc3\uff0c\u5e76\u5e94\u4ee5\u5144\u5f1f\u5173\u7cfb\u4e2a\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho", + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "4df4815855b4baec40b68107613e8d88", + "text": "Chinese, Xiang", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "7d70d884e74db8b4302ba0589166c634", + "text": "\u4eba\u4eba\u751f\u800c\u81ea\u7531\uff0c\u5728\u5c0a\u4e25\u548c\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4ed6\u4eec\u8d4b\u6709\u7406\u6027\u548c\u826f\u5fc3\uff0c\u5728\u5f97\u4ee5\u5144\u5f1f\u5173\u7cfb\u7684\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "9a0ba50e87478d37004278ca8d36dd31", + "text": "Chinese, Yue", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "932a20508f1be7b3c6fa54b0f9e46f14", + "text": "\u4eba\u4eba\u751f\u800c\u5e73\u7b49\uff0c\u55ba\u5c0a\u4e25\u540c\u57cb\u6743\u5229\u4e0a\u4e00\u5f8b\u5e73\u7b49\u3002\u4f62\u54cb\u6709\u7406\u6027\u540c\u57cb\u826f\u5fc3\uff0c\u800c\u4e14\u5e94\u5f53\u4ee5\u5144\u5f1f\u5173\u7cfb\u5605\u7cbe\u795e\u76f8\u5bf9\u5f85\u3002", + "metadata": { + "languages": [ + "kor", + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ce974d0bb96c5c69b851dee0186a878b", + "text": "Chokwe Mwese yoze masemuka katela ukulungunga ulengunga ulemu nyi vumbi eswe ci mwikha. Eswe kalingile kupwa nyi usambe nyi mangana nyi kuhasa kulimika nyumwe nyi mukwo nyi kulita nyi mbunge ya ulemu wa utu.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f535b754beb32eadd599ba0868093417", + "text": "Chokwe (Angola) Athu eswe kakusemuka ngwe akwo, ku vumbi nyi hakusakula.Kali nyi mana,mba mahasa kulinga umwu hali mukwo nyi espiritu ya kuli kuasa.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7ddc431e7188689370bfef9102ab3594", + "text": "Chuukese Esap wor och mettoch epwe appeti aramas seni fansoun ar uputiu non ar tufich me rait. Ar ekiek epwe mecheres o esap pet ren och sakkun mettoch pun ir repwe nonnomfengen non kinamwe o pwipwi annim.", + "metadata": { + "languages": [ + "swe", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "93683f443b25a57d05bfb3b2ab1533a8", + "text": "Chuvash \u041f\u0443\u0440 \u0445\u0430\u043b\u04d1\u0445 \u0442\u0430 \u0443\u0439\u0440\u04d1\u043c \u043f\u0443\u0440\u04d1\u043d\u043c\u0430 \u043f\u04d7\u0440 \u0442\u0430\u043d \u043f\u0440\u0430\u0432\u0430\u043b\u043b\u04d1. \u04aa\u0430\u043a \u043f\u0440\u0430\u0432\u0430\u043f\u0430 \u0443\u0441\u04d1 \u043a\u0443\u0440\u0441\u0430 \u0432\u04d7\u0441\u0435\u043c \u0445\u04d1\u0439\u0441\u0435\u043d \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0430 \u0441\u0442\u0430\u0442\u0443\u0441\u043d\u0435 \u0438\u0440\u04d7\u043a\u043b\u04d7\u043d \u0442\u0443\u0441\u0430 \u0445\u0443\u0440\u0430\u04ab\u04ab\u04d7, \u044d\u043a\u043e\u043d\u043e\u043c\u0438\u043a\u0430, \u043e\u0431\u0449\u0435\u0441\u0442\u0432\u043e \u0442\u0430\u0442\u0430 \u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430 \u0435\u043d\u04d7\u043f\u0435 \u0438\u0440\u04d7\u043a\u043b\u04d7\u043d \u0430\u0442\u0430\u043b\u0430\u043d\u0430\u04ab\u04ab\u04d7. \u041f\u0430\u0442\u0448\u0430\u043b\u04d1\u0445\u0441\u0435\u043d \u04ab\u0430\u043a \u043f\u0440\u0430\u0432\u04d1\u043d\u0430 \u0445\u0438\u0441\u0435\u043f\u043b\u0435\u043c\u0435\u043b\u043b\u0435, \u0442\u0435\u0440\u0440\u0438\u0442\u043e\u0440\u0438 \u043f\u04d7\u0440 \u043f\u04d7\u0442\u04d7\u043c\u043b\u04d7\u0445\u04d7\u043d \u043f\u0440\u0438\u043d\u0446\u0438\u043f\u04d7 \u0443\u043d\u043f\u0430 \u0443\u0441\u04d1 \u043a\u0443\u0440\u043c\u0430 \u043f\u04d7\u0440 \u0435\u043d\u043b\u04d7\u043d \u0447\u0430\u0440\u0441\u0430 \u0442\u04d1\u0440\u0430\u043a\u0430\u043d \u0447\u04d1\u0440\u043c\u0430\u0432 \u043f\u0443\u043b\u043c\u0430\u043b\u043b\u0430 \u043c\u0430\u0440.", + "metadata": { + "languages": [ + "rus", + "bul" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "ec45c354f720412810f1027f2aba4a1b", + "text": "Colorado Piyanle tsa'chila, mankarijun, junshi manta tan, in tobi jaminlajoyoe, titi mi, tenka kano min, junshi, tsa'chila tala, sen jono min.", + "metadata": { + "languages": [ + "fin", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7829c582fafb0be79ca15885a9ffe253", + "text": "Comorian, Maore Wanadamu piya udzalwa huru tsena sawa ha ufahari na ha haki. Na wawo wana \u00e3kili na hisi, esa ilazimu wadzivhinge na wanyao ha fikira ya unanya.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "829255cbbb3bc358933cca778550205a", + "text": "Comorian, Ngazidja Wo wanadamu piya wo uzalwa na uhuriya na usawa waki undru na uhaki. Wo upwawa ankili na urambuzi hayizo yilazimu warwaliyane hazitrendwa na fikira zaki unanya.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8aea2ff9710269cb8bdfd811de62b8cd", + "text": "Corsican Nascinu tutti l\u2019omi libari \u00e8 pari di dignit\u00e0 \u00e8 di diritti. Pussedinu a raghjoni \u00e8 a cuscenza \u00e8 li tocca ad agiscia tr\u00e0 elli di modu fraternu.", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "7174e554bd11372c5e339ba08b9881ab", + "text": "Cree, Swampy \u14a5\u14ef\u140c \u1403\u14c2\u14c2\u1424 \u144e\u142f\u14c2\u14a5\u144e\u14f1\u140e\u14c2\u1420 \u1401\u1511 \u14c2\u1455\u140e\u146d\u141f \u14c0\u1422\u1455 \u142f\u152d\u147e\u1423 \u146d\u148b \u1403\u1511 \u1472\u14c7\u1417\u1438\u14a5\u146f\u140e\u14ef\u141f \u146d\u1422\u144c\u14c2\u14a5\u144e\u14f1\u140e\u14c2\u1420 \u14c0\u1422\u1455 \u14a5\u14c2\u146f\u140e\u14ef\u140e\u14c7\u166e \u1401 \u1438\u146d\u144e\u14c7\u14aa\u148b\u1420 \u1472\u146b\u1455\u140c\u14c2\u1455\u14a7\u140e\u14c2\u14c2\u1424 \u14c0\u1422\u1455 \u14a5\u1450\u14c0\u14c2\u148b\u1472\u14c2\u14c2\u1424 \u14c0\u1422\u1455 \u140e\u148b\u1474\u14ef\u1450\u140e\u14c2\u1420 \u146d\u148b \u1403\u1511 \u1472\u14c7\u1417\u1438\u14a5\u1450\u148b\u1420\u166e", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "952f38639569c0ef489cc6ebb4e809a7", + "text": "Crimean Tatar B\u00fct\u00fcn insanlar serbestlik, menlik ve uquqlarda musaviy ol\u0131p d\u00fcnya\u011fa keleler. Olar aq\u0131l ve vicdan saibidirler ve biri-birilerinen qarda\u015f\u00e7as\u0131na munasebette bulunmal\u0131d\u0131rlar", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2ed33ba01de24e402f5963e9b2b56328", + "text": "Crioulo, Upper Guinea Tudu pekaduris ta padidu libri i igual na balur suma na diritus. Suma e dadu kapasidadi di pensa, e tene tambi konsiensia, e dibi di trata \u00f1utru suma ermons.", + "metadata": { + "languages": [ + "ind", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8eb33fe9d9a2a68e6a146718f7b97d24", + "text": "Crioulo, Upper Guinea (008) Tudu pecadur padidu livre, ninguin ca m\u00e1s ninguin, tudu djusta, tudu tem mesmu diritu. Tudu quin qui padidu, tem si ro\u00e7on, cu si manera di pensa. Na metadi di utrus I d\u00edbidi fassi cussas cu ermondadi.", + "metadata": { + "languages": [ + "ita", + "por", + "ron", + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9a87923b32ddc3eb20ab733920e58198", + "text": "Croatian Sva ljudska bi\u0107a ra\u0111aju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i svije\u0161\u0107u i treba da jedno prema drugome postupaju u duhu bratstva.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0666ab63ad7ac65ec7290cb18d27749d", + "text": "Czech V\u0161ichni lid\u00e9 rod\u00ed se svobodn\u00ed a sob\u011b rovn\u00ed co do d\u016fstojnosti a pr\u00e1v. Jsou nad\u00e1ni rozumem a sv\u011bdom\u00edm a maj\u00ed spolu jednat v duchu bratrstv\u00ed.", + "metadata": { + "languages": [ + "ces" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cb7b177025447a197e5f95166eeb0282", + "text": "Dagaare, Southern Nengsaala zaa ba nang d\u0254ge so la o menga, ka o ne o taaba zaa sengtaa noba emmo ane y\u025bl\u025bsoobo sobic po\u0254. Ba d\u0254g\u025b\u025b ba zaa ne y\u025bng ane y\u025bl\u025b-iruu k'a da seng ka ba er\u025b y\u025bl\u025b kor\u0254 taa a nga y\u0254\u0254mine.", + "metadata": { + "languages": [ + "tgl", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8e66c9e0bff4a344e85d8767b43fd67a", + "text": "Dagbani Sal' la sala. B\u025bhig' be sokam sanimi, din pa la amii. Suhiz\u0254bo be sokam sani; ka namb\u0254\u0263u beni. Suhub\u0254hibo mi bi lan k\u0254\u014b yigunaadam kam sani. Dinzu\u0263u dimb\u0254\u014b\u0254 zaa wuhiya ka dama di tu kamaata ka ti zaa yu tab' hali ni ti puuni.", + "metadata": { + "languages": [ + "swa", + "som", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b90d9e9d9c05b4f6982b37bbe3c37e9f", + "text": "Dangme Adesahi tsuo \u0254, a b\u0254 m\u025b n\u025b n\u0254 f\u025b\u025b n\u0254 e ye e he, n\u025b n\u0254 tsuaa n\u0254s\u0254 ng\u025b odehe si himi k\u025b he bl\u0254hi a bl\u0254 fa mi. A b\u0254 m\u025b k\u025b n\u0254\u0301 se k\u0254mi k\u025b he nule ju\u025bmi, n\u025b e hia kaa n\u0254 f\u025b\u025b n\u0254 n\u025b e na ny\u025bmi su\u0254mi k\u025b ha n\u0254 tsuaa n\u0254.", + "metadata": { + "languages": [ + "sqi", + "est", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "334d7844545ea360de232426f24cc228", + "text": "Danish Alle mennesker er f\u00f8dt frie og lige i v\u00e6rdighed og rettigheder. De er udstyret med fornuft og samvittighed, og de b\u00f8r handle mod hverandre i en broderskabets \u00e5nd.", + "metadata": { + "languages": [ + "dan" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "12deb838666ab6083a3dba9696b9fba1", + "text": "Dari \u062a\u0645\u0627\u0645 \u0627\u0641\u0631\u0627\u062f \u0628\u0634\u0631 \u0622\u0632\u0627\u062f \u0628\u0647 \u062f\u0646\u06cc\u0627 \u0645\u06cc\u200c\u0622\u06cc\u0646\u062f \u0648 \u0627\u0632 \u0644\u062d\u0627\u0638 \u062d\u06cc\u062b\u06cc\u062a \u0648 \u062d\u0642\u0648\u0642 \u0628\u0627 \u0647\u0645 \u0628\u0631\u0627\u0628\u0631\u0646\u062f. \u0647\u0645\u0647 \u062f\u0627\u0631\u0627\u06cc \u0639\u0642\u0644 \u0648 \u0648\u062c\u062f\u0627\u0646 \u0647\u0633\u062a\u0646\u062f \u0648 \u0628\u0627\u06cc\u062f \u0646\u0633\u0628\u062a \u0628\u0647 \u06cc\u06a9\u062f\u06cc\u06af\u0631 \u0628\u0627 \u0631\u0648\u062d \u0628\u0631\u0627\u062f\u0631\u06cc \u0631\u0641\u062a\u0627\u0631 \u06a9\u0646\u0646\u062f.", + "metadata": { + "languages": [ + "fas" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3551715d069482f6ec4dba0cd2418882", + "text": "Dendi Aduniya kuna n gu ibuna damayo h\u025bi n\u0254 dei-dei nn daama nna n burucinit\u025br\u025b f\u0254, n lasabu nna laakari ya nam nn m\u0254 huro c\u025br\u025b kuna nyanze t\u025br\u025b b\u0254\u014b\u0254\u0254.", + "metadata": { + "languages": [ + "swa", + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ac128efe598097cdb68a483b1ea1f22c", + "text": "Dinka, Northeastern Raan th\u00f6k eben aye dh\u00eb\u00ebth ka lau nh\u00f6m kua th\u00f6\u014b nhiim eyithiic, kua th\u025b\u0308kic, kua ci y\u00ebknhiethku puou, ku bik c\u00eb\u014b ka ke ye mith etik.", + "metadata": { + "languages": [ + "sqi", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "377f3dff94511f4733f9a8fa47685f8a", + "text": "Ditammari Oniti ti p\u025bi n\u0256\u025b om\u0254\u0169 yi kpaatri ot\u0254u, k\u025b y\u025b\u0303 oniti ba we, o yi \u0256o nn\u025b f\u025bh\u0254\u0303f\u025b; o m\u0254k\u025bmu m\u025bcii k\u025bh\u00e3 m\u025by\u025bmm\u025b. Ti t\u00fa n\u025b \u0256o kenyari ti t\u0254b\u025b mb\u025b k\u025b yie mii ba nkwu\u0254 ko ot\u0254u \u0256au.", + "metadata": { + "languages": [ + "swa", + "fin", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "5dba7623f7ce02476ce5807c3cc7c5f4", + "text": "Drung Avzangf max pyvccuf byv syvnax zyxyyv ef, lifxingx ningx lyangvxinx alf, taixrav angvnikxrav gwanxxix mix syv av duixdaix.", + "metadata": { + "languages": [ + "swe", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7ae6fb6c55acb3c2df3ad8079b9d2b5f", + "text": "Dutch Alle mensen worden vrij en gelijk in waardigheid en rechten geboren. Zij zijn begiftigd met verstand en geweten, en behoren zich jegens elkander in een geest van broederschap te gedragen.", + "metadata": { + "languages": [ + "nld" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "58343bf1070d7f16553f03d984ab9241", + "text": "Dzongkha \u0f60\u0f42\u0fb2\u0f7c\u0f0b\u0f56\u0f0b\u0f58\u0f72\u0f0b\u0f5a\u0f74\u0f0b\u0f42\u0f0b\u0f62\u0f0b\u0f51\u0f63\u0f0b\u0f51\u0f56\u0f44\u0f0b\u0f42\u0f72\u0f0b\u0f50\u0f7c\u0f42\u0f0b\u0f63\u0f66\u0f0b\u0f66\u0f90\u0fb1\u0f7a\u0f66\u0f0b\u0f4f\u0f7a\u0f0b\u0f61\u0f7c\u0f51\u0f54\u0f0b\u0f63\u0f66\u0f0b \u0f42\u0f0b\u0f62\u0f0b\u0f63\u0f74\u0f0b\u0f56\u0f62\u0fa9\u0f72\u0f0b\u0f58\u0f50\u0f7c\u0f44\u0f0b\u0f51\u0f44\u0f0b\u0f50\u0f7c\u0f56\u0f0b\u0f51\u0f56\u0f44\u0f0b\u0f60\u0f51\u0fb2\u0f0b\u0f58\u0f49\u0f58\u0f0b\u0f66\u0fa6\u0f7a\u0f0b\u0f61\u0f7c\u0f51\u0f0d \u0f58\u0f72\u0f0b\u0f5a\u0f74\u0f0b\u0f42\u0f0b\u0f62\u0f0b\u0f66\u0fa8\u0fb2\u0f0b\u0f64\u0f7a\u0f66\u0f0b\u0f51\u0f7c\u0f53\u0f0b\u0f42\u0f7c\u0f0b\u0f56\u0f60\u0f72\u0f0b\u0f58\u0f5a\u0f53\u0f0b\u0f49\u0f72\u0f51\u0f0b\u0f51\u0f44\u0f0b\u0f63\u0fa1\u0f53\u0f58\u0f0b\u0f63\u0f66\u0f0b \u0f42\u0f0b\u0f62\u0f0b\u0f42\u0f72\u0f66\u0f0b\u0f63\u0f71\u0f0b\u0f42\u0f0b\u0f45\u0f72\u0f0b\u0f62\u0f0b\u0f60\u0f56\u0f51\u0f0b\u0f62\u0f74\u0f44\u0f0b \u0f42\u0f45\u0f72\u0f42\u0f0b\u0f42\u0f72\u0f66\u0f0b\u0f42\u0f45\u0f72\u0f42\u0f0b\u0f63\u0f74\u0f0b\u0f66\u0fa4\u0f74\u0f53\u0f0b\u0f46\u0f60\u0f72\u0f0b\u0f60\u0f51\u0f74\u0f0b\u0f64\u0f7a\u0f66\u0f0b\u0f56\u0f66\u0f90\u0fb1\u0f7a\u0f51\u0f0b\u0f50\u0f7c\u0f42\u0f0b\u0f63\u0f66\u0f0b\u0f63\u0f71\u0f0b\u0f60\u0f56\u0f51\u0f0b\u0f51\u0f42\u0f7c\u0f0d", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e88cfa3a1dc30bd8c76487377938bb87", + "text": "Edo Emwan ne agbon hia ne a biere, a bie iran noyan-egbe iran kevbe wee, umwon-mwen o ree etin hia ne o kheke iran khin. A ye ewaen kevbe ekhoe ne o maa wu iran, ne iran gha yin da egbe vbe orhion oghe eten-okpa.", + "metadata": { + "languages": [ + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4c8da40656a8284287e43685f7f4447c", + "text": "English All human beings are born free and equal in dignity and rights. They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7d5794631564e8ff8a2bf245087903a4", + "text": "Ese Ejja Ojja\u00f1a esejja ojja\u00f1a oyaja yojjaya cuayani quiapame oyajayojjaya quiapame ojja\u00f1a eseya quiapame quia tai jjashauabataiquiani ecueya epejji jayo jjaya ojja\u00f1a jajji ojja\u00f1ajaassi eseyajayojja.", + "metadata": { + "languages": [ + "swa", + "fin", + "spa", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5f8fd43155bbf931b71069f21ba6a609", + "text": "Esperanto \u0108iuj homoj estas denaske liberaj kaj egalaj la\u016d digno kaj rajtoj. Ili posedas racion kaj konsciencon, kaj devus konduti unu al alia en spirito de frateco.", + "metadata": { + "languages": [ + "slv", + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e59c6075ee4dbde4faa66c2bdc180029", + "text": "Estonian K\u00f5ik inimesed s\u00fcnnivad vabadena ja v\u00f5rdsetena oma v\u00e4\u00e4rikuselt ja \u00f5igustelt. Neile on antud m\u00f5istus ja s\u00fcdametunnistus ja nende suhtumist \u00fcksteisesse peab kandma vendluse vaim.", + "metadata": { + "languages": [ + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "699838930374f69143263bd99d88883e", + "text": "Even \u0411\u044d\u0439\u0438\u043b \u0431\u043e\u043a\u044d\u0442\u0447\u0443\u0440 \u043e\u043c\u044d\u043d \u0445\u0438\u043b\u043a\u0438\u0447 \u043d\u044f\u043d \u0443\u0440\u0443\u043c\u043a\u044d\u0440 \u0431\u0430\u043b\u0434\u0430\u0440\u0438\u0442\u043d\u043e, \u0442\u0435\u043c\u0438 \u043d\u043e\u04a5\u0430\u0440\u0434\u0443\u043a \u044d\u0433\u0434\u044c\u044d\u043d \u04a5\u0438\u2010\u0434\u0430 \u0430\u0447\u0447\u0430. \u0411\u044d\u0439\u0438\u043b \u0431\u04e9\u043a\u044d\u0442\u0447\u0443\u0440 \u043c\u044d\u043d \u0434\u043e\u043b\u0430\u043d \u0430\u043a\u0430\u0433\u0447\u0438\u043c\u0443\u0440 \u0431\u0438\u043d\u043d\u044d\u0442\u044b\u043d.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8164afd787069e69d3a6bed633cfdb21", + "text": "Evenki \u0423\u043f\u043a\u0430\u0442 \u0438\u043b\u044d\u043b \u0442\u044b\u0304\u043d\u043c\u0443\u043a\u0438\u0440\u0434\u0438, \u0443\u0440\u044d\u0304\u043b\u0434\u0438 \u043c\u044d\u0304\u043d\u04a3\u0438 \u0441\u0430\u0304\u0440\u0438\u0447\u0430\u0304\u0434\u0438 \u0431\u0430\u043b\u0434\u044b\u0434\u044f\u0440\u0430. \u041d\u0443\u04a3\u0430\u0440\u0442\u044b\u043d \u0434\u044f\u043b\u0438\u0442\u0432\u0438, \u04bb\u0430\u043b\u0434\u044f\u043d\u0434\u044b\u0432\u0438 \u0431\u0438\u0441\u0438, \u043c\u044d\u043c\u044d\u0433\u0438\u0304\u043b\u0432\u044d\u0440 \u0430\u044f\u0440\u0430\u043b\u0434\u044b\u0304\u0434\u044f\u043d\u0430 \u0442\u044d\u0434\u0435\u0442 \u043e\u0304\u043c\u0430\u043c\u0430\u0447\u0438\u0442\u044b\u043d.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8ba9631d337f32fb2b5a0049718f7162", + "text": "\u00c9w\u00e9 Wodzi amegbet\u0254wo kata\u0303 abl\u0254\u0256eviwoe eye wodzena bubu kple gomekp\u0254kp\u0254 s\u0254s\u0254e. Susu kple dzitsinya le wo domet\u0254 \u0256esia\u0256e si eyata wodze be woan\u0254 anyi le \u0256ekaw\u0254w\u0254 blibo me.", + "metadata": { + "languages": [ + "pol" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4dad8f50be71b880b8d1cd3aa2083177", + "text": "Fante W\u0254wo adasa nyina to fahodzi mu, na h\u0254n nyina y\u025b p\u025br w\u0254 enyimnyam na ndzinoa mu. W\u0254maa h\u0254n nyina adwen na tsibowa, na \u0254w\u0254 d\u025b h\u0254n nkitahodzi mu ndzey\u025b\u025b da no edzi d\u025b w\u0254y\u025b enuanom.", + "metadata": { + "languages": [ + "swa", + "pol" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f8e68d4590ad494f5d3039e113c1ac46", + "text": "Faroese \u00d8ll menniskju eru f\u00f8dd fr\u00e6ls og j\u00f8vn til vir\u00f0ingar og mannar\u00e6ttindi. Tey hava skil og samvitsku og eiga at fara hv\u00f8rt um anna\u00f0 \u00ed br\u00f3\u00f0uranda.", + "metadata": { + "languages": [ + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2f3af719eba5f3392f87df0894e56c42", + "text": "Farsi, Western \u062a\u0645\u0627\u0645 \u0627\u0641\u0631\u0627\u062f \u0628\u0634\u0631 \u0622\u0632\u0627\u062f \u0628\u062f\u0646\u06cc\u0627 \u0645\u06cc\u0627\u06cc\u0646\u062f \u0648 \u0627\u0632 \u0644\u062d\u0627\u0638 \u062d\u06cc\u062b\u06cc\u062a \u0648 \u062d\u0642\u0648\u0642 \u0628\u0627 \u0647\u0645 \u0628\u0631\u0627\u0628\u0631\u0646\u062f. \u0647\u0645\u0647 \u062f\u0627\u0631\u0627\u06cc \u0639\u0642\u0644 \u0648 \u0648\u062c\u062f\u0627\u0646 \u0645\u06cc\u0628\u0627\u0634\u0646\u062f \u0648 \u0628\u0627\u06cc\u062f \u0646\u0633\u0628\u062a \u0628\u06cc\u06a9\u062f\u06cc\u06af\u0631 \u0628\u0627 \u0631\u0648\u062d \u0628\u0631\u0627\u062f\u0631\u06cc \u0631\u0641\u062a\u0627\u0631 \u06a9\u0646\u0646\u062f.", + "metadata": { + "languages": [ + "fas" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c031a4bdd55bf8f8599aaeda8cbe0d56", + "text": "Fijian Era sucu ena galala na tamata yadua, era tautauvata ena nodra dokai kei na nodra dodonu. E tiko na nodra vakasama kei na nodra lewaeloma, sa dodonu mera veidokadokai ena yalo ni veitacini.", + "metadata": { + "languages": [ + "hrv", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b70785870cc673f7dcbb24c8464d43fc", + "text": "Finnish Kaikki ihmiset syntyv\u00e4t vapaina ja tasavertaisina arvoltaan ja oikeuksiltaan. Heille on annettu j\u00e4rki ja omatunto, ja heid\u00e4n on toimittava toisiaan kohtaan veljeyden hengess\u00e4.", + "metadata": { + "languages": [ + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ecc193afbaf5bf317c868860f5dfc5ec", + "text": "Finnish, Kven Kaikki ihmiset synnyth\u00e4\u00e4n vaphaina, ja heil\u00e4 kaikila oon sama ihmisarvo ja samat ihmisoikkeuet. Het oon saanheet j\u00e4rjen ja omatunnon, ja het pieth\u00e4\u00e4n ell\u00e4\u00e4t toinen toisen kans niin ko veljet keskenh\u00e4\u00e4n.", + "metadata": { + "languages": [ + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e2a252e076d508cd7e312c25eaf70331", + "text": "Fon Ac\u025b, susu kpo sisi \u0256okpo \u0254 kpo w\u025b gb\u025bt\u0254 bi \u0256o \u0256\u00f2 gb\u025bwiwa t\u0254n hwenu; ye \u0256o linkp\u0254n b\u0254 ayi yet\u0254n m\u025b kpe lo b\u0254 ye \u0256o na do al\u0254 ye\u0256ee \u0256i n\u0254vin\u0254vi \u0256\u0254hun.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d26195c0225bad321fc98f526b1fb27b", + "text": "French Tous les \u00eatres humains naissent libres et \u00e9gaux en dignit\u00e9 et en droits. Ils sont dou\u00e9s de raison et de conscience et doivent agir les uns envers les autres dans un esprit de fraternit\u00e9.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f5ce0eb3d199445ab33436a396fca8cb", + "text": "Frisian, Western Alle minsken wurde frij en gelyk yn weardigens en rjochten berne. Hja hawwe ferst\u00e2n en gewisse meikrigen en hearre har foar inoar oer yn in geast fan bruorskip te h\u00e2lden en te dragen.", + "metadata": { + "languages": [ + "nld", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0da991393fa9f40d78c4143c3a25b02a", + "text": "Friulian Ducj i oms a nassin libars e compagns come dignit\u00e2t e derits. A an sintiment e cussience e bisugne che si tratin un culaltri come fradis.", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "216db5a1011f211d9206a47a9e0e4839", + "text": "Fulfulde, Nigerian Innama aadeeji fof poti, ndim\u0257idi e jibinannde to bannge hakkeeji. E\u0253e ngoodi miijo e hakkilantaagal ete e\u0253e poti huufo ndirde e nder \u0253 iynguyummaagu.", + "metadata": { + "languages": [ + "est", + "dan" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d245ad5ed3e4ee8727b8152745ffdba6", + "text": "Fulfulde, Nigerian (2) \u0181i-aadama fuu dimo danyete/jibinte o fotan be koomoye e ne\u0257\u0257aaku be hakkeeji. \u0253e ndokkaa\u0253e hakkiilo ngaandi nden bo \u0253e kuutindiray hakkunde ma\u0253\u0253e nder yi\u0257yi\u0257\u0257irki mbandiraagu.", + "metadata": { + "languages": [ + "som", + "tur", + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "71e526a7453aa9c044c6f695d1fe4c78", + "text": "Fur kwa-s\u00ed ny\u00e9tti\u014b baajt\u00f3l\u00e1 kereli n\u00e1s nisila na ta\u0331g\u0268d\u0268\u014b arr\u00e1 ka\u0331\u0268\u014b, Na\u014b-s\u00ed ugola na kilma\u014b\u00e1 arr\u00e1 ka\u0331\u0268\u014b nam\u00e1 in l\u00f3\u014b \u00e1l\u00e1\u014b s\u01d4r\u014b\u00e2-s\u00ed k\u00ed jai\u014ba in k\u00e9\u00e9l n\u00e1 s\u01d4r\u014b\u00e2 suur\ua78c\u00ed\u014b b\u00e2r\u014ba.", + "metadata": { + "languages": [ + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dfd804850bd4d6daab5db7227283c3ab", + "text": "Ga Af\u0254 gb\u0254m\u0254 f\u025b\u025b gb\u0254m\u0254 y\u025b agbojee mli, k\u025b hegb\u025b ko ni dam\u0254 \u014b\u025bl\u025b koome n\u0254. Gb\u0254m\u025bi f\u025b\u025b y\u025b jw\u025b\u014bm\u0254 k\u025b henilee, ni no hew\u0254 l\u025b esa ak\u025b am\u025bhe ahi shi y\u025b ny\u025bmi su\u0254m\u0254 mli.", + "metadata": { + "languages": [ + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "38140682ca9cf0b5c7f1cf203b331589", + "text": "Gaelic, Irish Saol\u00e1itear na daoine uile saor agus comhionann ina nd\u00ednit agus ina gcearta. T\u00e1 bauidh an r\u00e9as\u00fain agus an choinsiasa acu agus dl\u00edd iad f\u00e9in d'iompar de mheon bhrthreachais i leith a ch\u00e9ile.", + "metadata": { + "languages": [ + "eng", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c74c5c12c1d20c63c0512bda5ec488ee", + "text": "Gaelic, Scottish Tha gach uile dhuine air a bhreth saor agus co-ionnan ann an urram 's ann an c\u00f2irichean. Tha iad air am breth le reusan is le cogais agus mar sin bu ch\u00f2ir dhaibh a bhith be\u00f2 nam measg fhein ann an spiorad br\u00e0thaireil,", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "adb7eafcda17469d6dffe53ac281b9e7", + "text": "Gagauz Insannar hepsi duu\u00earlar serbest hem birtak\u0131m kendi k\u0131ymetind\u00e4 hem haklar\u0131nda. Onnara verilmi\u015f ak\u0131l hem \u00fcz da l\u00e4az\u0131m biri-birin\u00e4 davrans\u0131nnar karda\u015fl\u0131k ruhuna uygun.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d838922d035c343059a70e88f83100af", + "text": "Galician T\u00f3dolos seres humanos nacen libres e iguais en dignidade e dereitos e, dotados como est\u00e1n de raz\u00f3n e conciencia, d\u00edbense comportar fraternalmente uns cos outros.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "505a5c77309f7753fe016e50776647b7", + "text": "Ganda Abantu bazaalibwa nga balina eddembe n'obuyinza ebyenkanankana, batondebwa nga balina amagezi era nga basobola okwawula ekirungi n'ekibi bwebatyo, buli omu agwana okuyisa munne nga muganda we.", + "metadata": { + "languages": [ + "swa", + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "ec7ace2c582cd24ef64d447f5e1e7a08", + "text": "Garifuna Sun g\u00fcrigia nas\u00edruati yuti lun, lidan \u00faarani, lawiwandun\u00ed lib\u00e1gari kai le aubai lab\u00fasienra, gatu gi\u00f1e lanagun lungua buidu hadan l\u00edbegu.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3db8c991f134adb8e84617cd84e56d43", + "text": "Gen Agbet\u0254wo kpata le jijim\u025ba, \u0256o vosin\u0254n\u0254, nyi gb\u00e8s\u0254\u025b\u0301m\u025b\u0301w\u00f3 le nuj\u0254nunnyi ku go\u0256oejisewo, am\u025bbusewo m\u025b. Tagb\u0254 le woa si, eye w\u0254nawo s\u0254doda woan\u0254n\u0254wo gb\u0254a la nyi n\u0254\u0301visil\u00e9l\u00e9.", + "metadata": { + "languages": [ + "swa", + "pol" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cb7127a24ce99f60f18c47121fcbe3cb", + "text": "Georgian \u10e7\u10dd\u10d5\u10d4\u10da\u10d8 \u10d0\u10d3\u10d0\u10db\u10d8\u10d0\u10dc\u10d8 \u10d8\u10d1\u10d0\u10d3\u10d4\u10d1\u10d0 \u10d7\u10d0\u10d5\u10d8\u10e1\u10e3\u10e4\u10d0\u10da\u10d8 \u10d3\u10d0 \u10d7\u10d0\u10dc\u10d0\u10e1\u10ec\u10dd\u10e0\u10d8 \u10d7\u10d0\u10d5\u10d8\u10e1\u10d8 \u10e6\u10d8\u10e0\u10e1\u10d4\u10d1\u10d8\u10d7\u10d0 \u10d3\u10d0 \u10e3\u10e4\u10da\u10d4\u10d1\u10d4\u10d1\u10d8\u10d7. \u10db\u10d0\u10d7 \u10db\u10d8\u10dc\u10d8\u10ed\u10d4\u10d1\u10e3\u10da\u10d8 \u10d0\u10e5\u10d5\u10d7 \u10d2\u10dd\u10dc\u10d4\u10d1\u10d0 \u10d3\u10d0 \u10e1\u10d8\u10dc\u10d3\u10d8\u10e1\u10d8 \u10d3\u10d0 \u10d4\u10e0\u10d7\u10db\u10d0\u10dc\u10d4\u10d7\u10d8\u10e1 \u10db\u10d8\u10db\u10d0\u10e0\u10d7 \u10e3\u10dc\u10d3\u10d0 \u10d4\u10e5\u10ea\u10d4\u10dd\u10d3\u10dc\u10d4\u10dc \u10eb\u10db\u10dd\u10d1\u10d8\u10e1 \u10e1\u10e3\u10da\u10d8\u10e1\u10d9\u10d5\u10d4\u10d7\u10d4\u10d1\u10d8\u10d7.", + "metadata": { + "languages": [ + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "60e95060440c3ac89b53764c839a9658", + "text": "German, Standard (1901) Alle Menschen sind frei und gleich an W\u00fcrde und Rechten geboren. Sie sind mit Vernunft und Gewissen begabt und sollen einander im Geist der Br\u00fcderlichkeit begegnen.", + "metadata": { + "languages": [ + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d9454188531f323f4587d2668a35dce4", + "text": "German, Standard (1996) Alle Menschen sind frei und gleich an W\u00fcrde und Rechten geboren. Sie sind mit Vernunft und Gewissen begabt und sollen einander im Geist der Br\u00fcderlichkeit begegnen.", + "metadata": { + "languages": [ + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "82bf90db0534cabdc2efe2971f9bb4c6", + "text": "Gilyak \u0421\u0438\u043a \u043d\u0438\u0432\u0433\u0443\u043d \u043a\u0443\u0493\u044b\u0442\u04fb\u0430\u0440\u0442\u0430, \u043f\u02bc\u0438\u043d\u0430\u043c\u0430\u0434 \u044f\u0439\u043c\u0442\u0430 \u0430\u0434\u044f\u0439 \u043f\u0440\u0430\u0432\u043e\u0493\u0438\u0440\u030c \u043f\u02bc\u04ca\u0430\u0444\u049b-\u04ca\u0430\u0444\u049b\u0493\u0438\u0440\u030c \u0441\u0430\u043b\u04fb\u0430\u0442\u0430 \u04ff\u0430\u0442 \u043f\u0430\u043d\u0442\u0430\u0434\u0493\u0443\u043d.", + "metadata": { + "languages": [ + "bul", + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d61fdd2d22e77149dff43d70d62d722f", + "text": "Gonja Bu kurwe dimedi kik\u025b mobe kumu so, n\u025b mobe, eyilikpa, keshe\u014b n\u025b kashinte\u014b ma\u014b k\u0254r eko pey\u025b to. Nyinpela sa dimedi kik\u025b lakal n\u025b mf\u025bra fan\u025b bu chena abarso kelepo so.", + "metadata": { + "languages": [ + "swa", + "ind", + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0361867eb371916c85e13fcc3dde7f4b", + "text": "Greek (monotonic) \u038c\u03bb\u03bf\u03b9 \u03bf\u03b9 \u03ac\u03bd\u03b8\u03c1\u03c9\u03c0\u03bf\u03b9 \u03b3\u03b5\u03bd\u03bd\u03b9\u03bf\u03cd\u03bd\u03c4\u03b1\u03b9 \u03b5\u03bb\u03b5\u03cd\u03b8\u03b5\u03c1\u03bf\u03b9 \u03ba\u03b1\u03b9 \u03af\u03c3\u03bf\u03b9 \u03c3\u03c4\u03b7\u03bd \u03b1\u03be\u03b9\u03bf\u03c0\u03c1\u03ad\u03c0\u03b5\u03b9\u03b1 \u03ba\u03b1\u03b9 \u03c4\u03b1 \u03b4\u03b9\u03ba\u03b1\u03b9\u03ce\u03bc\u03b1\u03c4\u03b1. \u0395\u03af\u03bd\u03b1\u03b9 \u03c0\u03c1\u03bf\u03b9\u03ba\u03b9\u03c3\u03bc\u03ad\u03bd\u03bf\u03b9 \u03bc\u03b5 \u03bb\u03bf\u03b3\u03b9\u03ba\u03ae \u03ba\u03b1\u03b9 \u03c3\u03c5\u03bd\u03b5\u03af\u03b4\u03b7\u03c3\u03b7, \u03ba\u03b1\u03b9 \u03bf\u03c6\u03b5\u03af\u03bb\u03bf\u03c5\u03bd \u03bd\u03b1 \u03c3\u03c5\u03bc\u03c0\u03b5\u03c1\u03b9\u03c6\u03ad\u03c1\u03bf\u03bd\u03c4\u03b1\u03b9 \u03bc\u03b5\u03c4\u03b1\u03be\u03cd \u03c4\u03bf\u03c5\u03c2 \u03bc\u03b5 \u03c0\u03bd\u03b5\u03cd\u03bc\u03b1 \u03b1\u03b4\u03b5\u03bb\u03c6\u03bf\u03c3\u03cd\u03bd\u03b7\u03c2.", + "metadata": { + "languages": [ + "ell" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ef30df67b6cbf4e05af379e61e529561", + "text": "Greek (polytonic) \u1f4d\u03bb\u03bf\u03b9 \u03bf\u1f31 \u1f04\u03bd\u03b8\u03c1\u03c9\u03c0\u03bf\u03b9 \u03b3\u03b5\u03bd\u03bd\u03b9\u03bf\u1fe6\u03bd\u03c4\u03b1\u03b9 \u1f10\u03bb\u03b5\u1f7b\u03b8\u03b5\u03c1\u03bf\u03b9 \u03ba\u03b1\u1f76 \u1f34\u03c3\u03bf\u03b9 \u03c3\u03c4\u1f74\u03bd \u1f00\u03be\u03b9\u03bf\u03c0\u03c1\u1f73\u03c0\u03b5\u03b9\u03b1 \u03ba\u03b1\u1f76 \u03c4\u1f70 \u03b4\u03b9\u03ba\u03b1\u03b9\u1f7d\u03bc\u03b1\u03c4\u03b1. \u0395\u1f36\u03bd\u03b1\u03b9 \u03c0\u03c1\u03bf\u03b9\u03ba\u03b9\u03c3\u03bc\u1f73\u03bd\u03bf\u03b9 \u03bc\u1f72 \u03bb\u03bf\u03b3\u03b9\u03ba\u1f74 \u03ba\u03b1\u1f76 \u03c3\u03c5\u03bd\u03b5\u1f77\u03b4\u03b7\u03c3\u03b7, \u03ba\u03b1\u1f76 \u1f40\u03c6\u03b5\u1f77\u03bb\u03bf\u03c5\u03bd \u03bd\u1f70 \u03c3\u03c5\u03bc\u03c0\u03b5\u03c1\u03b9\u03c6\u1f73\u03c1\u03bf\u03bd\u03c4\u03b1\u03b9 \u03bc\u03b5\u03c4\u03b1\u03be\u1f7b \u03c4\u03bf\u03c5\u03c2 \u03bc\u1f72 \u03c0\u03bd\u03b5\u1fe6\u03bc\u03b1 \u1f00\u03b4\u03b5\u03bb\u03c6\u03bf\u03c3\u1f7b\u03bd\u03b7\u03c2.", + "metadata": { + "languages": [ + "ell" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a8aaedf9144ce4af4a672873d93945c2", + "text": "Guaran\u00ed, Paraguayan Mayma yvyp\u00f3ra ou ko yvy \u00e1ri i\u00f1apytl\u02bcyre ha ete\u0129cha dignidad ha derecho jeguerek\u00f3pe; ha ikatu rupi oikuaa a\u00f1et\u00e9va ha a\u00f1ete\u02bcyva, ipor\u00e3va ha iva\u00edva, tekotev\u1ebd pehengu\u00e9icha oiko o\u00f1ondiveku\u00e9ra.", + "metadata": { + "languages": [ + "slk", + "por", + "spa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1a8dccbb2225da58c6c32c944346a88f", + "text": "Guarayu Opakatu ava yoro\u2019a nda\u2019ei tembigwaigwa oyoyatupri, sekotupri, va\u00ebra, imboeteisara, oikatu ipi\u2019a yemo\u00f1eta, imbaekua, ndiyai yurekorairai \u00f1ep\u00ebi p\u00ebi ambua rese.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2aff799c80d0ba06e344f3b917c6aa5a", + "text": "Gujarati \u0aaa\u0acd\u0ab0\u0aa4\u0abf\u0ab7\u0acd\u0aa0\u0abe \u0a85\u0aa8\u0ac7 \u0a85\u0aa7\u0abf\u0a95\u0abe\u0ab0\u0acb\u0aa8\u0ac0 \u0aa6\u0ac3\u0ab7\u0acd\u0a9f\u0abf\u0a8f \u0ab8\u0ab0\u0acd\u0ab5 \u0aae\u0abe\u0aa8\u0ab5\u0acb \u0a9c\u0aa8\u0acd\u0aae\u0aa5\u0ac0 \u0ab8\u0acd\u0ab5\u0aa4\u0a82\u0aa4\u0acd\u0ab0 \u0a85\u0aa8\u0ac7 \u0ab8\u0aae\u0abe\u0aa8 \u0ab9\u0acb\u0aaf \u0a9b\u0ac7. \u0aa4\u0ac7\u0aae\u0aa8\u0abe\u0aae\u0abe\u0a82 \u0ab5\u0abf\u0a9a\u0abe\u0ab0\u0ab6\u0a95\u0acd\u0aa4\u0abf \u0a85\u0aa8\u0ac7 \u0a85\u0a82\u0aa4\u0a83\u0a95\u0ab0\u0aa3 \u0ab9\u0acb\u0aaf \u0a9b\u0ac7 \u0a85\u0aa8\u0ac7 \u0aa4\u0ac7\u0aae\u0aa3\u0ac7 \u0aaa\u0ab0\u0ab8\u0acd\u0aaa\u0ab0 \u0aac\u0a82\u0aa7\u0ac1\u0aa4\u0acd\u0ab5\u0aa8\u0ac0 \u0aad\u0abe\u0ab5\u0aa8\u0abe\u0aa5\u0ac0 \u0ab5\u0ab0\u0acd\u0aa4\u0ab5\u0ac1\u0a82 \u0a9c\u0acb\u0a87\u0a8f.", + "metadata": { + "languages": [ + "guj" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7c7879f1335e2e8f7c0ca4a80cb6d9fc", + "text": "Gumuz Dub\ua78caga b\ua78caga metaam metaam alamaam kamaanzaak\ua78coma kas\ua78ce bipok\ua78coga kamad\ua78cab maafuc\ua78cak\ua78cwa haaga bac\ua78caga tso. Ka\u0301b\ua78caga jajanda kwa jala etigafalagash ma\ua78ciiya nago metaagwa eyaal yida-eba bic\ua78caga tso.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c591dbcd933d69898871c75fc9b2c5b8", + "text": "Haitian Creole French (Kreyol) Tout moun f\u00e8t lib, egal ego pou diyite kou w\u00e8 dwa. Nou gen la rezon ak la konsyans epi nou f\u00e8t pou nou aji youn ak lot ak yon lespri fwat\u00e8nite.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1caef318c81d61c240de817182b5b56b", + "text": "Haitian Creole French (Popular) Tout moun sou t\u00e8 a f\u00e8t tou lib. Tout gen menm val\u00e8 (nan je lasosyete), tout moun gen menm dwa devan Lalwa. Tout moun f\u00e8t ak yon bonsans, tout f\u00e8t ak yon konsyans epi youn f\u00e8t pou trete l\u00f2t tankou fr\u00e8 ak s\u00e8.", + "metadata": { + "languages": [ + "fra", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ac23b37d7fc3617b806d164fb38da99e", + "text": "Hani Aqsol liq yoqdeivq yoqpyuq bo, meeqyaovq ssolnei colpyuq qiq kov dei. Davqtavcolssaq neenyuq bel neema meeq ya siq, laongaoq meilnaol nadul meil e gaq ssol hhyul hha bavqduv nia.", + "metadata": { + "languages": [ + "cat", + "som", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "100bdd3a0bc9a25394f34018b95871fe", + "text": "Hausa Duk \u2018yan\u2019adan ana haihuwarsu ne a matsayin \u2018yantattun \u2018ya\u2019ya, kuma mutuncinsu da haqqoqinsu daidai yake da na kowa. Suna da tunani da cikakken hankali, saboda haka ake son duk mu\u2019amalar da za su yi, ta kasance akwai \u2018yan\u2019uwantaka a tsakani.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "19ff46e13339eab9d9fce6566dad6102", + "text": "Hausa (Niger) Su dai \u01b4an\u2010adam, ana haifuwarsu ne duka \u01b4antattu, kuma kowannensu na da mutunci da hakkoki daidai da na kowa. Suna da hankali da tunani, saboda haka duk abin da za su aikata wa juna, ya kamata su yi shi a cikin \u01b4an\u2010uwanci.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "39fce89f870171ba68c60c4aaaeb5509", + "text": "Hausa (Nigeria) Su dai \u2018yan-adam, ana haifuwarsu ne duka \u2018yantattu, kuma kowannensu na da mutunci da hakkoki daidai da na kowa. Suna da hankali da tunani, saboda haka duk abin da za su aikata wa juna, ya kamata su yi shi a cikin \u2018yan-uwanci.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5a888adab3cc776c69ebb4b588db4bfb", + "text": "Hawaiian H\u0101nau k\u016b\u2019oko\u2019a \u2018ia n\u0101 k\u0101naka apau loa, a ua kau like ka hanohano a me n\u0101 pono k\u012bvila ma luna o k\u0101kou p\u0101kahi. Ua ku\u2019u mai ka no\u2019ono\u2019o pono a me ka \u2018ike pono ma luna o k\u0101kou, no laila, e aloha k\u0101kou kekahi i kekahi.", + "metadata": { + "languages": [ + "swa", + "lav" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9bce25b61dc4faf00ebf9ae5bedd19aa", + "text": "Hebrew \u05db\u05dc \u05d1\u05e0\u05d9 \u05d0\u05d3\u05dd \u05e0\u05d5\u05dc\u05d3\u05d5 \u05d1\u05e0\u05d9 \u05d7\u05d5\u05e8\u05d9\u05df \u05d5\u05e9\u05d5\u05d5\u05d9\u05dd \u05d1\u05e2\u05e8\u05db\u05dd \u05d5\u05d1\u05d6\u05db\u05d5\u05d9\u05d5\u05ea\u05d9\u05d4\u05dd. \u05db\u05d5\u05dc\u05dd \u05d7\u05d5\u05e0\u05e0\u05d5 \u05d1\u05ea\u05d1\u05d5\u05e0\u05d4 \u05d5\u05d1\u05de\u05e6\u05e4\u05d5\u05df, \u05dc\u05e4\u05d9\u05db\u05da \u05d7\u05d5\u05d1\u05d4 \u05e2\u05dc\u05d9\u05d4\u05dd \u05dc\u05e0\u05d4\u05d5\u05d2 \u05d0\u05d9\u05e9 \u05d1\u05e8\u05e2\u05d4\u05d5 \u05d1\u05e8\u05d5\u05d7 \u05e9\u05dc \u05d0\u05d7\u05d5\u05d4.", + "metadata": { + "languages": [ + "heb" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c47d3d0f8955a5c0769b982de1bcab85", + "text": "Hiligaynon Ang tanan nga tao ginbun-ag nga hilway kag may pag-alalangay sa dungog kag katarungan. Sila ginhatagan sang pagpamat-od kag konsensya kag nagakadapat nga magbinuligay sa kahulugan sang pag-inuturay.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "8af5d2f7586f72942fcfc21e4f9f0e7e", + "text": "Hindi \u0938\u092d\u0940 \u092e\u0928\u0941\u0937\u094d\u092f\u094b\u0902 \u0915\u094b \u0917\u094c\u0930\u0935 \u0914\u0930 \u0905\u0927\u093f\u0915\u093e\u0930\u094b\u0902 \u0915\u0947 \u092e\u093e\u092e\u0932\u0947 \u092e\u0947\u0902 \u091c\u0928\u094d\u092e\u091c\u093e\u0924 \u0938\u094d\u0935\u0924\u0928\u094d\u0924\u094d\u0930\u0924\u093e \u0914\u0930 \u0938\u092e\u093e\u0928\u0924\u093e \u092a\u094d\u0930\u093e\u092a\u094d\u0924 \u0939\u0948 \u0964 \u0909\u0928\u094d\u0939\u0947\u0902 \u092c\u0941\u0926\u094d\u0927\u093f \u0914\u0930 \u0905\u0928\u094d\u0924\u0930\u093e\u0924\u094d\u092e\u093e \u0915\u0940 \u0926\u0947\u0928 \u092a\u094d\u0930\u093e\u092a\u094d\u0924 \u0939\u0948 \u0914\u0930 \u092a\u0930\u0938\u094d\u092a\u0930 \u0909\u0928\u094d\u0939\u0947\u0902 \u092d\u093e\u0908\u091a\u093e\u0930\u0947 \u0915\u0947 \u092d\u093e\u0935 \u0938\u0947 \u092c\u0930\u094d\u0924\u093e\u0935 \u0915\u0930\u0928\u093e \u091a\u093e\u0939\u093f\u090f \u0964", + "metadata": { + "languages": [ + "hin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b992780a7e7cfec805b61d50bd3cbb25", + "text": "Hindustani, Sarnami Sab djanne aadj\u00e1di aur barabar paidaa bhail\u00e8n, iddjat aur hak m\u00ea. Ohi djanne ke lage sab ke samadj-boedj aur hierdaai hai aur doesare se sab soemmat s\u00e8, djaane-maane ke chaahin.", + "metadata": { + "languages": [ + "est", + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d4294c077d745315e3700a34906a1a37", + "text": "Hmong, Northern Qiandong Laix laix diangl dangt lol sob dab yangx ghax maix zit yef, niangb diot gid zenb nieef haib gid quaif lit gid nongd jus diel pinf denx. Nenx dol maix laib lix xent haib jox hvib vut, nenx dol nongt liek bed ut id xit deit dait.", + "metadata": { + "languages": [ + "cat", + "nld", + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d5c28cb0359d5c0d40ae0645be22c03b", + "text": "Hmong, Southern Qiandong Leb leb nis zib youl nangs, mex ad sheit nangd zend yanl nhangs njanl lib. Mix mex lix xinb gaot liangt send, leb leb lies nhangs ghob nab ghob geud nangd.", + "metadata": { + "languages": [ + "tgl", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3ae5d0023d02b871b6b7567348fbd99b", + "text": "Hmong Njua Cuat lenx cuat dol bongb deul ndax dex douf muax zif youx, nyaob shout zunb yinx tab ndas dos id, dax zis ib suk. Nil buab daf lol jaox muax lid xinf hlub hout tab liangx xinb shab nzhuk, yinf gaib keuk suk gud dix mol lol nit jinb shenx lol shib daf shib hlad.", + "metadata": { + "languages": [ + "som", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4113619dd86b7bf65f70dd31f3155ce1", + "text": "Huastec (San Lu\u00eds Potos\u00ed) Patal an inik ani an uxum u wa'tsinal walkadh abal junun\u00fal kin bats'uw an alwa'tal\u00e1b ani ka pidhan in \u00e9y jant'ini' in tomn\u00e1l; in kwa'al in tsalp\u00e1dh ani in k'ay\u00e1' abal kin k'anidha' in juntal.", + "metadata": { + "languages": [ + "ind", + "swa", + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cec56f0f701b47b7615015993ec87eaa", + "text": "Huastec (Sierra de Otontepec) Kuentsal nap wah-ch\u00ednal tee ti chabal jayechek-i antip wah-ch\u00ednal, b\u00e1 tam\u00e1 maxak a pulik maxak in exlal, jununul an\u00ed ni chap an\u00ed jaxtam ko-yal kip le-nax\u00edn an\u00ed ki k-ana ti ba.", + "metadata": { + "languages": [ + "ind", + "cat", + "ces" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6f91b5cb130f52af680e4b1b597c984a", + "text": "Huastec (Veracruz) Ejtal an kw'ajiiltsik u wa'chinal kweteem ani chu'udh k'al an chu'uxtalaab ani yajat ka k'aak'naaxin juun ani juun.", + "metadata": { + "languages": [ + "swa", + "est", + "som", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "68c1e44b4d3af66e1c5cddb5a8861a91", + "text": "Huitoto, Murui Nana ca\u0268 comuillamona dama ca\u0268 abido it\u0268ca\u0268. Ca\u0268 comuillamona j\u0268a\u0268m\u0268e anamo i\u00f1ed\u0268ca\u0268. Nana daje facaiconi it\u0268ca\u0268. Ab\u0268 ui\u00f1uanona comuid\u0268ca\u0268. Dan\u0268 conin\u0268rie ca\u0268 nabairilla.", + "metadata": { + "languages": [ + "ita", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "35c2ba2ee3067a7d3d5509a2f11f8123", + "text": "Hungarian Minden. emberi l\u00e9ny szabadon sz\u00fcletik \u00e9s egyenl\u0151 m\u00e9lt\u00f3s\u00e1ga \u00e9s joga van. Az emberek, \u00e9sszel \u00e9s lelkiismerettel b\u00edrv\u00e1n, egym\u00e1ssal szemben testv\u00e9ri szellemben kell hogy viseltessenek.", + "metadata": { + "languages": [ + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "57d454640d5878f3ce695e2f10449346", + "text": "Ibibio Kpukpuru owo emana nte amanison, enyun enyene ukem ukem uku ye unen. Eyoho mmo ye ukeme ndikere nkpo, ndinyun nyene esit, ke ntre, mmo enyene ndiman nkpo mbana kiet eken ke esit ndito eka.", + "metadata": { + "languages": [ + "ind", + "swa", + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d1120c74094e3c70d2191f6d40987753", + "text": "Icelandic Hver ma\u00f0ur er borinn frj\u00e1ls og jafn \u00f6\u00f0rum a\u00f0 vir\u00f0ingu og r\u00e9ttindum. Menn eru g\u00e6ddir vitsmunum og samvizku, og ber \u00feeim a\u00f0 breyta br\u00f3\u00f0urlega hverjum vi\u00f0 annan.", + "metadata": { + "languages": [ + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7a90d2a44053e814d2d0cdd9e816e459", + "text": "Ido Omna homi naskas libera ed egala relate digneso e yuri. Li es dotita per raciono e koncienco e devas agar vers l'una l'altra en spirito di frateso.", + "metadata": { + "languages": [ + "ita", + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c061731c2409f1d04154bcb99040df32", + "text": "Idoma \u0118g\u0119 ni modudu ac\u0119 k\u0119c\u0119 nya b\u0119c\u0119 \u0119hehi aa ,hibi \u0119g\u037b ma ac\u0119 duu jonjil\u0119 ipu koc\u0119gba n\u037bc\u0119 c\u0119gba m\u0119ml\u2019ojonjil\u0119 ipu \u037bdah ni yab\u037b \u037bc\u0119 nya. Odudu ac\u0119 kwu \u0452wule ml\u2019ohili otu m\u0119ml\u2019ocai k\u0119la j\u037bc\u0119 \u037bha ni yipu \u037btu \u037bc\u0119 aa, higb\u037b ma \u037bc\u0119 higbo y\u037bda m\u0119ml\u2019 \u037bmpa gunu l\u0119 b\u037bin\u0119 nu ma.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c3dc3590b2338d3585c67664e25eb878", + "text": "Igbo A m\u1ee5r\u1ee5 mmad\u1ee5 nile n'ohere nakwa nha anya ugwu na ikike. E nyere ha uche na mm\u1ee5\u1ecd ime ihe ziri ezi nke na ha kwesiri \u1ecbkpaso ibe ha agwa n'obi nwanne na nwanne.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "050a0685e37c5cdf1484af7fb81846c0", + "text": "Ijo, Southeast Kim\u2019 owoumo se, keni bara ki na, pa zimi, ose keni bara kemi. Kim\u2019se ye iroro, mani ikiou nana, enini kim\u2019se dudu tari teme nana weri iyenri.", + "metadata": { + "languages": [ + "swa", + "slv", + "hrv", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ea67730b04f48bb8c5b63709621f2034", + "text": "Ilocano Amin nga tao nga sibibiag ket naiyanak a siwawayawaya ken addaan iti agpapada nga dayaw ken kalintegan. Naikkanda ti panagikalintegan ken konsensya a nasken ti panagtitinnulong iti meysa ken meysa iti espiritu nga nainkak-absatan.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7cec6b86e84db86bb3df3b8e636075e3", + "text": "Indonesian Semua orang dilahirkan merdeka dan mempunyai martabat dan hak-hak yang sama. Mereka dikaruniai akal dan hati nurani dan hendaknya bergaul satu sama lain dalam semangat persaudaraan.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e2a669a4be13da0177954c07c8ca0014", + "text": "Interlingua Tote le esseres human nasce libere e equal in dignitate e in derectos. Illes es dotate de ration e de conscientia e debe ager le unes verso le alteres in un spirito de fraternitate.", + "metadata": { + "languages": [ + "ita", + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c08152bc9c1cbc1930714b7051e6100a", + "text": "Inuktitut, Eastern Canadian \u1403\u14c5\u152a\u14d5\u14ab\u1466 \u140a\u14c2\u1585\u144e\u1546\u152a\u14d5\u14ab\u1466 \u1403\u14c5\u14da\u1405\u1550\u14aa\u1455 \u1403\u14f1\u14aa\u1550\u14f1\u1550\u15a2\u144e\u1483 \u140a\u14bb\u14aa\u14d7 \u140a\u153e\u1528\u1405\u1583\u144e\u148c\u1483\u15a2\u144e\u1483 \u14c2\u1550\u14f1\u140a\u1591\u14c2\u1483\u146f\u1466 \u140a\u14bb\u14aa\u14d7 \u1431\u152a\u14d0\u14c7\u1403\u144e\u144e\u148d\u1466. \u1403\u14f1\u1583\u1585\u1450\u1581\u144e\u1583\u1550\u144e\u1455\u1405\u1559\u14d5\u1550\u1433\u1466 \u1431\u153e\u152a\u144e\u1583\u1550\u14c2\u1483\u146f\u1466 \u1583\u1455\u1673\u144e\u148c\u1466\u144e\u140a\u1546\u140a\u1583\u1550\u14c2\u1483\u146f\u14ea\u14d7.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "85b8bd3d1031ba2138519e0c94a05535", + "text": "Inuktitut, Greenlandic Inuit tamarmik inunngorput nammineersinnaassuseqarlutik assigiimmillu ataqqinassuseqarlutillu pisinnaatitaaffeqarlutik. Solaqassusermik tarnillu nalunngissusianik pilersugaapput, imminnullu iliorfigeqatigiittariaqaraluarput qatanngutigiittut peqatigiinnerup anersaavani.", + "metadata": { + "languages": [ + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6e8030f949832ac1e4d5632bc1a06b48", + "text": "Italian Tutti gli esseri umani nascono liberi ed eguali in dignit\u00e0 e diritti. Essi sono dotati di ragione e di coscienza e devono agire gli uni verso gli altri in spirito di fratellanza.", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "ec65722b37347cefd9069c89a8e75791", + "text": "Japanese", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "57bbff46bb89b26b933206afe0fd8904", + "text": "\u3059\u3079\u3066\u306e\u4eba\u9593\u306f\u3001\u751f\u307e\u308c\u306a\u304c\u3089\u306b\u3057\u3066\u81ea\u7531\u3067\u3042\u308a\u3001\u304b\u3064\u3001\u5c0a\u53b3\u3068\u6a29\u5229\u3068\u306b\u3064\u3044\u3066\u5e73\u7b49\u3067\u3042\u308b\u3002\u4eba\u9593\u306f\u3001\u7406\u6027\u3068\u826f\u5fc3\u3068\u3092\u6388\u3051\u3089\u308c\u3066\u304a\u308a\u3001\u4e92\u3044\u306b\u540c\u80de\u306e\u7cbe\u795e\u3092\u3082\u3063\u3066\u884c\u52d5\u3057\u306a\u3051\u308c\u3070\u306a\u3089\u306a\u3044\u3002", + "metadata": { + "languages": [ + "jpn" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "5e6d21f4f6cf7b92b7fdaecf265580aa", + "text": "Japanese (Osaka)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "11becf872133958b85928710255eb2cc", + "text": "\u3059\u3079\u3066\u306e\u4eba\u9593\u306f\u3001\u751f\u307e\u308c\u306a\u304c\u3089\u306b\u3057\u3066\u81ea\u7531\u3084\u3057\u3001\u304b\u3064\u3001\u5c0a\u53b3\u3068\u6a29\u5229\u3068\u306b\u3064\u3044\u3066\u5e73\u7b49\u3084\u3002\u4eba\u9593\u306f\u3001\u7406\u6027\u3068\u826f\u5fc3\u3068\u3092\u6388\u3051\u3089\u308c\u3066\u304a\u308a\u3001\u4e92\u3044\u306b\u540c\u80de\u306e\u7cbe\u795e\u3092\u3082\u3063\u3066\u884c\u52d5\u3057\u306a\u3002", + "metadata": { + "languages": [ + "jpn" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "673e122c097796c5aa83f02476e37529", + "text": "Japanese (Tokyo)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "491550640c5496ae9b9e41b4c6cc14f0", + "text": "\u5168\u90e8\u306e\u4eba\u9593\u306f\u3001\u751f\u307e\u308c\u306a\u304c\u3089\u306b\u3057\u3066\u81ea\u7531\u3067\u3042\u308a\u3001\u304b\u3064\u3001\u5c0a\u53b3\u3068\u6a29\u5229\u3068 \u306b\u3064\u3044\u3066\u5e73\u7b49\u3067\u3042\u308b\u3002\u4eba\u9593\u306f\u3001\u7406\u6027\u3068\u826f\u5fc3\u3068\u3092\u6388\u3051\u3089\u308c\u3066\u304a\u308a\u3001\u4e92\u3044\u306b\u540c \u80de\u306e\u7cbe\u795e\u3092\u3082\u3063\u3066\u884c\u52d5\u3057\u306a\u3051\u308c\u3070\u306a\u3089\u306a\u3044\u3002", + "metadata": { + "languages": [ + "jpn" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "d1c8d98009aff8c745beed6b2d4c44f3", + "text": "Javanese (Javanese)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "36abfab21253834165ada6ce4b89b5e6", + "text": "\ua9cb\ua9b1\ua9a7\ua9bc\ua9a4\ua9c0\ua9b2\ua9b8\ua9ae\ua9ba\ua9b4\ua981\ua98f\ua9ad\ua9b2\ua9b6\ua982\ua9ab\ua98f\ua9ba\ua98f\ua9a4\ua9c0\ua99b\ua9b6\ua9a9\ua982\ua9a2\ua9b6\ua98f\ua9ad\ua9a4\ua9c0\ua9a2\ua982\ua9a7\ua9ba\ua9a9\ua982\ua9a0\ua9a7\ua9a0\ua9c0\ua9ad\ua9a4\ua9c0\ua9b2\ua98f\ua9c0\ua9b2\ua98f\ua9c0\ua98f\ua981\ua9a5\ua99d\ua9c9\u200b\ua98f\ua9a7\ua9ba\ua983\ua9a5\ua9b6\ua9a4\ua9ab\ua9b6\ua981\ua994\ua9a4\ua9c0\ua9b2\ua98f\ua9ad\ua9c0\ua9ad\ua9a4\ua9c0\ua98f\ua9ad\ua9c0\ua9a7\ua9b8\ua9b1\ua982\ua9a0\ua98f\ua9b2\ua997\ua9a7\ua9c0\ua9a5\ua9b1\ua9bf\ua9ae\ua9b8\ua981\ua994\ua9a4\ua9c0\ua9b2\ua981\ua992\ua9ba\ua9b4\ua9a4\ua9c0\ua9a4\ua9ba\ua9a9\ua9bc\ua9a9\ua9b6\ua9a0\ua9bf\ua9a4\ua9c0\ua9b1\ua9b6\ua997\ua9b6\ua9ad\ua9a4\ua9c0\ua9b1\ua9b6\ua997\ua9b6\ua9a4\ua9ba\ua98f\ua9a4\ua9c0\ua99b\ua9b6\ua997\ua9b6\ua9ae\ua9ba\ua9b4\ua9b1\ua9b8\ua9a9\ua9a2\ua9b8\ua9ad\ua9b8\ua982\ua9c9\u200b", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "117934bb8b775293442e8ca3921ad1da", + "text": "Javanese (Latin) Saben uwong kalairake kanthi mardika lan darbe martabat lan hak-hak kang padha. Kabeh pinaringan akal lan kalbu sarta kaajab pasrawungan anggone memitran siji lan sijine kanthi jiwo sumadulur.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6b54f0a53f2c7bb4545835a761d4654b", + "text": "Jola-Fonyi Bukanak b\u00farom nan kuwolimi kurere kererer di waafaw b\u00farom. Kubabaj poop b\u00fayejet di karampenoor.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b2c33dfdb2855a8786e1145a6dbbedc2", + "text": "Jula W\u00f3lo\u2019 l\u00e1, h\u00e1damaden\u2019 b\u025b\u025b ye h\u0254r\u0254n ye, b\u025b\u025b k\u00e1 k\u00e1n l\u00e0nbe n\u00ed h\u00e1k\u025byaw l\u00e1. M\u0254g\u0254 b\u025b\u025b ye h\u00e1kilitigi ye, b\u025b\u025b ye h\u00e1kilima ye ; \u00f2 l\u00e0, \u00f9 k\u00e1 k\u00e1n k\u00e0 \u0272g\u0254n m\u00edna n\u00ed b\u00e1denya ye.", + "metadata": { + "languages": [ + "hun", + "vie" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "78522b71b29080a2ab8b60b4f8bdb929", + "text": "K'iche', Central Konojel ri winaq are taq ke'alaxik pa junaman ya'tal chkech kakechab'ej ronojel ri utzil; utz kakib'ano, kakichomaj, kakib'ij jasa je' ri k'o pa kanima, rumal che ri junam kib'antajik. Rajawaxik xuqe' kakimulij kib' che utzukuxuk ri loq'ob'al pa we uwachulew.", + "metadata": { + "languages": [ + "ind", + "hrv", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "03b0bbddb1137224b43b690dfcc5b506", + "text": "Kabardian \u0426\u04cf\u044b\u0445\u0443 \u043f\u0441\u043e\u0440\u0438 \u0449\u0445\u044c\u044d\u0445\u0443\u0438\u0442\u0443, \u044f \u0449\u04cf\u044b\u0445\u044c\u044b\u043c\u0440\u044d \u044f \u0445\u0443\u044d\u0444\u0430\u0449\u044d\u0445\u044d\u043c\u0440\u044d\u043a\u04cf\u044d \u0437\u044d\u0445\u0443\u044d\u0434\u044d\u0443 \u043a\u044a\u0430\u043b\u044a\u0445\u0443\u0440. \u0410\u043a\u044a\u044b\u043b\u0440\u044d \u0437\u044d\u0445\u044d\u0449\u04cf\u044b\u043a\u04cf \u0433\u044a\u0443\u0430\u0437\u044d\u0440\u044d \u044f\u04cf\u044d\u0449\u0438, \u0437\u044b\u0440 \u0437\u044b\u043c \u0437\u044d\u043a\u044a\u0443\u044d\u0448 \u0437\u044d\u0445\u0430\u0449\u0406\u044d \u044f\u043a\u0443 \u0434\u044d\u043b\u044a\u0443 \u0437\u044d\u0445\u0443\u0449\u044b\u0442\u044b\u043d \u0445\u0443\u0435\u0439\u0445\u044d\u0449.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5da5e2f597a0e6fce26a5359c72395b3", + "text": "Kabiy\u00e9 Pal\u028al\u028a\u028a \u025byaaa n\u025b pa-t\u0269 y\u0254\u0254 w\u025b\u028a kpaagbaa n\u025b p\u025bw\u025b\u025b k\u0269ma\u014b wala \u025bs\u0269ndaa. Pal\u028al\u028a\u028a-w\u025b n\u025b p\u0254-l\u0254\u014b n\u025b pa-ma\u0263z\u0269m; mb\u028a yekina n\u025b p\u0254s\u0254\u0254l\u0269 \u0256ama se p\u025bk\u025b \u025byaa pa-t\u0269\u014bg\u025b.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b1298a59ae52d3a285db4b52acce1f32", + "text": "Kabuverdianu Tudo ser humano na \u00eas mundo nac\u00ea libri e igual na s\u00ea dignidade e na s\u00eas dr\u00eato. Na s\u00eas razon e na s\u00eas conc\u00e9n\u00e7a, tudo arguem deb\u00ea porc\u00ead\u00ea pa co tudo guenti na sprito di fraternidadi.", + "metadata": { + "languages": [ + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d1fe7eed38b94d986fb537125627c4f2", + "text": "Kafa Ubbe ashi bushoo shiijjeto tatoonaa ame megoona aalloon, oogoonaa wuroonon yechiiniye. Ikkoo baroona manittine shalligoonaa naboona yeshet shalligoon boono shaddeyoo hakkiimm qello boonoshich ichete.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f6f8a776d36f4db6ffdd50e83fee6488", + "text": "Kannada \u0c8e\u0cb2\u0ccd\u0cb2\u0cbe \u0cae\u0cbe\u0ca8\u0cb5\u0cb0\u0cc2 \u0cb8\u0ccd\u0cb5\u0ca4\u0c82\u0ca4\u0ccd\u0cb0\u0cb0\u0cbe\u0c97\u0cbf\u0caf\u0cc7 \u0c9c\u0ca8\u0cbf\u0cb8\u0cbf\u0ca6\u0ccd\u0ca6\u0cbe\u0cb0\u0cc6. \u0cb9\u0cbe\u0c97\u0cc2 \u0c98\u0ca8\u0ca4\u0cc6 \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0cb9\u0c95\u0ccd\u0c95\u0cc1\u0c97\u0cb3\u0cb2\u0ccd\u0cb2\u0cbf \u0cb8\u0cae\u0cbe\u0ca8\u0cb0\u0cbe\u0c97\u0cbf\u0ca6\u0ccd\u0ca6\u0cbe\u0cb0\u0cc6. \u0cb5\u0cbf\u0cb5\u0cc7\u0c95 \u0cae\u0ca4\u0ccd\u0ca4\u0cc1 \u0c85\u0c82\u0ca4\u0c83\u0c95\u0cb0\u0ca3\u0c97\u0cb3\u0ca8\u0ccd\u0ca8\u0cc1 \u0caa\u0ca1\u0cc6\u0ca6\u0cb5\u0cb0\u0cbe\u0ca6\u0ccd\u0ca6\u0cb0\u0cbf\u0c82\u0ca6 \u0c85\u0cb5\u0cb0\u0cc1 \u0caa\u0cb0\u0cb8\u0ccd\u0caa\u0cb0 \u0cb8\u0cb9\u0ccb\u0ca6\u0cb0 \u0cad\u0cbe\u0cb5\u0ca6\u0cbf\u0c82\u0ca6 \u0cb5\u0cb0\u0ccd\u0ca4\u0cbf\u0cb8\u0cac\u0cc7\u0c95\u0cc1.", + "metadata": { + "languages": [ + "kan" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2600735e35ce8a6dc8243d2269bbeee5", + "text": "Kanuri, Central Adamgana woso kambe katambo ye daraja-a hakkiwa-ason kalkalye. Hankal-a nazaru-asoro k\u0259z\u0259pk\u0259 ye suro hal n\u0259mharamiben kamazasoga letaiyin ye.", + "metadata": { + "languages": [ + "swa", + "ind", + "som", + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a841ec547609322347b08be60cc1c722", + "text": "Kaonde Bonse bantu basemwa bakasuluka kabiji baesakena pamo mubuneme. Baji na maana a kulanguluka kabiji bobila bantu bakwabo byubilo bakwibasekesha.", + "metadata": { + "languages": [ + "swa", + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2e5fe352907c2d71abf3a0283032775f", + "text": "Kaqchikel, Central Konojel ri winaqi' kan kalaxib'en pe ri kolotaj\u00efk, ri junan kiq'ij, ri junan kejqalen, junan kich'ojib'al pa kik'aslen, xa achi'el k'a ri kik'ojlen, ri kinojib'al kichajin xa tik'amun k'a chi nimal\u00e4j konojel xtikajo' ki'.", + "metadata": { + "languages": [ + "slv", + "hrv", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "23d27d0652af0739dbaa674e88fc9ae4", + "text": "Karakalpak \u04b2\u04d9\u043c\u043c\u0435 \u0430\u0434\u0430\u043c\u043b\u0430\u0440 \u04e9\u0437 \u049b\u04d9\u0434\u0438\u0440-\u049b\u044b\u043c\u0431\u0430\u0442\u044b \u0436\u04d9\u043d\u0435 \u04b3\u0443\u049b\u044b\u049b\u043b\u0430\u0440\u044b\u043d\u0434\u0430 \u0435\u0440\u043a\u0438\u043d \u04b3\u04d9\u043c \u0442\u0435\u04a3 \u0431\u043e\u043b\u044b\u043f \u0442\u0443\u045e\u044b\u043b\u0430\u0434\u044b. \u041e\u043b\u0430\u0440\u0493\u0430 \u0430\u049b\u044b\u043b \u04b3\u04d9\u043c \u04b3\u04af\u0436\u0434\u0430\u043d \u0431\u0435\u0440\u0438\u043b\u0433\u0435\u043d \u0431\u043e\u043b\u044b\u043f, \u0431\u0438\u0440-\u0431\u0438\u0440\u0438\u043d\u0435 \u0442\u0443\u045e\u044b\u0441\u049b\u0430\u043d\u043b\u044b\u049b \u0440\u0443\u045e\u0445\u044b\u043d\u0434\u0430\u0493\u044b \u049b\u0430\u0442\u043d\u0430\u0441\u0442\u0430 \u0431\u043e\u043b\u044b\u045e\u044b \u0442\u0438\u0439\u0438\u0441.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c6f580433e84639a19b178da5dc4b3a2", + "text": "Karelian Kai rahvas roittahes v\u00e4llinny da taza-arvozinnu omas arvos da oigevuksis. Jogahizele heis on annettu mieli da omatundo da heil v\u00e4lt\u00e4m\u00e4tt\u00e4h pid\u00e4y olla kesken\u00e4h, kui vellil.", + "metadata": { + "languages": [ + "est", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "87e368f61c4a1ba6e0a5743d4d2d41b2", + "text": "Kasem Ba loge n\u0254\u0254na maama se ba taa ye bedwe mo ba \u014bwea de ba chega seini, ye fefeo teira k\u0254taa. W\u025b p\u025b ba swa de bobo\u014ba mo se ba taa ye nubiu daane ye ba jege da \u014bwa\u014ba.", + "metadata": { + "languages": [ + "som", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1908a740d8aedadb521f39432a6cbed8", + "text": "Kazakh \u0411\u0430\u0440\u043b\u044b\u049b \u0430\u0434\u0430\u043c\u0434\u0430\u0440 \u0442\u0443\u043c\u044b\u0441\u044b\u043d\u0430\u043d \u0430\u0437\u0430\u0442 \u0436\u04d9\u043d\u0435 \u049b\u0430\u0434\u0456\u0440\u2010\u049b\u0430\u0441\u0438\u0435\u0442\u0456 \u043c\u0435\u043d \u043a\u04b1\u049b\u044b\u049b\u0442\u0430\u0440\u044b \u0442\u0435\u04a3 \u0431\u043e\u043b\u044b\u043f \u0434\u04af\u043d\u0438\u0435\u0433\u0435 \u043a\u0435\u043b\u0435\u0434\u0456. \u0410\u0434\u0430\u043c\u0434\u0430\u0440\u0493\u0430 \u0430\u049b\u044b\u043b\u2010\u043f\u0430\u0440\u0430\u0441\u0430\u0442, \u0430\u0440\u2010\u043e\u0436\u0434\u0430\u043d \u0431\u0435\u0440\u0456\u043b\u0433\u0435\u043d, \u0441\u043e\u043d\u0434\u044b\u049b\u0442\u0430\u043d \u043e\u043b\u0430\u0440 \u0431\u0456\u0440\u2010\u0431\u0456\u0440\u0456\u043c\u0435\u043d \u0442\u0443\u044b\u0441\u0442\u044b\u049b, \u0431\u0430\u0443\u044b\u0440\u043c\u0430\u043b\u0434\u044b\u049b \u049b\u0430\u0440\u044b\u043c\u2010\u049b\u0430\u0442\u044b\u043d\u0430\u0441 \u0436\u0430\u0441\u0430\u0443\u043b\u0430\u0440\u044b \u0442\u0438\u0456\u0441.", + "metadata": { + "languages": [ + "ukr", + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "75b6a6751bcdf3ddfc1745d8e7118815", + "text": "Khakas \u041f\u043e\u043b\u0493\u0430\u043d \u043d\u0430 \u043a\u0456\u0437\u0456 \u043f\u043e\u0441 \u043f\u0430\u0437\u0430 \u0442\u0438\u04a3 \u0442\u04e7\u0440\u0456\u043f\u0447\u0435 \u043f\u0430\u0437\u0430 \u0442\u0438\u04a3 \u043f\u043e\u0441\u0442\u044b\u04a3 \u0441\u0438\u043d\u0456\u043d \u043f\u0456\u043b\u0456\u043d\u0433\u0435\u043d\u0456\u043d \u043f\u0430\u0437\u0430 \u0442\u04e7\u0440\u0435\u043b\u0435\u0440\u0456\u043d\u0456\u04a3\u0434\u0435 \u043f\u043e\u043b\u0447\u0430. \u041e\u043b\u0430\u0440\u0434\u044b\u04a3 \u0441\u0430\u0493\u044b\u043d\u0493\u0430\u043d\u044b \u043f\u0430\u0437\u0430 \u0430\u0440\u044b\u0493 \u0441\u0430\u0493\u044b\u0441 \u043f\u0430\u0440 \u043f\u0430\u0437\u0430 \u0445\u0430\u0440\u044b\u043d\u0434\u0430\u0441\u0442\u0430\u0440 \u0447\u0456\u043b\u0438 \u0442\u0443\u0434\u044b\u043d\u0430\u0440\u0493\u0430 \u043a\u0438\u0440\u0435\u043a\u0442\u0435\u0440.", + "metadata": { + "languages": [ + "ukr", + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "74a93facd90bf0553bdf368698baa2a5", + "text": "Khasi \u00cfa ki bynriew baroh la kha laitluid bad ki \u00efaryngkat ha ka burom bad ki hok. Ha ki la bsiap da ka bor pyrkhat bad ka jing\u00efatiplem bad ha ka mynsiem jingsngew shipara ki dei ban \u00efatrei bynrap lang.", + "metadata": { + "languages": [ + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "b6ab4d5f0569e217cd985de6b9f5ca73", + "text": "Khmer, Central \u1798\u1793\u17bb\u179f\u17d2\u179f\u1791\u17b6\u17c6\u1784\u17a2\u179f\u17cb \u1780\u17be\u178f\u1798\u1780\u1798\u17b6\u1793\u179f\u17c1\u179a\u17b8\u1797\u17b6\u1796 \u1793\u17b7\u1784\u179f\u1798\u1797\u17b6\u1796 \u1780\u17d2\u1793\u17bb\u1784\u1795\u17d2\u1793\u17c2\u1780\u179f\u17c1\u1785\u1780\u17d2\u178a\u17b8\u1790\u17d2\u179b\u17c3\u1790\u17d2\u1793\u17bc\u179a\u1793\u17b7\u1784\u179f\u17b7\u1791\u17d2\u1792\u17b7\u17d4 \u1798\u1793\u17bb\u179f\u17d2\u179f \u1798\u17b6\u1793\u179c\u17b7\u1785\u17b6\u179a\u178e\u1789\u17d2\u1789\u17b6\u178e\u1793\u17b7\u1784\u179f\u178f\u17b7\u179f\u1798\u17d2\u1794\u1787\u1789\u17d2\u1789\u17c8\u1787\u17b6\u1794\u17cb\u1796\u17b8\u1780\u17c6\u178e\u17be\u178f \u17a0\u17be\u1799\u1782\u1794\u17d2\u1794\u17b8\u1794\u17d2\u179a\u1796\u17d2\u179a\u17b9\u178f\u17d2\u178a\u1785\u17c6\u1796\u17c4\u17c7\u1782\u17d2\u1793\u17b6\u1791\u17c5\u179c\u17b7\u1789\u1791\u17c5\u1798\u1780\u1780\u17d2\u1793\u17bb\u1784\u179f\u17d2\u1798\u17b6\u179a\u178f\u17b8\u1797\u17b6\u178f\u179a\u1797\u17b6\u1796\u1787\u17b6\u1794\u1784\u1794\u17d2\u17a2\u17bc\u1793\u17d4", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "841467ed91005c2b65ccce68e9bac719", + "text": "Kh\u00fcn \u1a3e\u1a36\u1a69\u1a54\u1a7c\u1a34\u1a60\u1a26\u1a62\u1a49\u1a56\u1a63\u1a60\u1a3f\u1a20\u1a6e\u1a60\u1a2f\u1a68\u1a3e\u1a63\u1a3e\u1a66\u1a3b\u1a60\u1a26\u1a48\u1a41\u1a53\u1a62\u1a39\u1a60\u1a3f\u1a75\u1a26\u1a3b\u1a60\u1a3f\u1a26\u1a20\u1a60\u1a36\u1a62 \u1a36\u1a71\u1a20\u1a65\u1a32\u1a60\u1a32\u1a65\u1a48\u1a60\u1a20\u1a62 \u1a53\u1a62\u1a48\u1a65\u1a34\u1a60\u1a35\u1a65 \u1a32\u1a75\u1a63\u1a60\u1a26\u1a23\u1a73\u1a76\u1a23\u1a62\u1a3e\u1a66\u1a3e\u1a36\u1a6e\u1a63\u1a35\u1a3e\u1a60\u1a3e\u1a7c\u1a53\u1a62 \u1a23\u1a60\u1a45\u1a41\u1a37\u1a2d\u1a65\u1a37\u1a60\u1a32\u1a62\u1a32\u1a73\u1a75\u1a20\u1a60\u1a36\u1a62\u1a2f\u1a62\u1a60\u1a45\u1a60\u1a3f\u1a23\u1a60\u1a45\u1a63\u1a60\u1a3e\u1a39\u1a60\u1a3f\u1a75\u1a26\u1a3b\u1a60\u1a3f\u1a26\u1a20\u1a60\u1a36\u1a62", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7abc18c11be0eb0d9f9526fbe76af972", + "text": "Kirghiz \u0411\u0430\u0440\u0434\u044b\u043a \u0430\u0434\u0430\u043c\u0434\u0430\u0440 \u04e9\u0437 \u0431\u0435\u0434\u0435\u043b\u0438\u043d\u0434\u0435 \u0436\u0430\u043d\u0430 \u0443\u043a\u0443\u043a\u0442\u0430\u0440\u044b\u043d\u0434\u0430 \u044d\u0440\u043a\u0438\u043d \u0436\u0430\u043d\u0430 \u0442\u0435\u04a3 \u0443\u043a\u0443\u043a\u0442\u0443\u0443 \u0431\u043e\u043b\u0443\u043f \u0436\u0430\u0440\u0430\u043b\u0430\u0442. \u0410\u043b\u0430\u0440\u0434\u044b\u043d \u0430\u04a3\u2010\u0441\u0435\u0437\u0438\u043c\u0438 \u043c\u0435\u043d\u0435\u043d \u0430\u0431\u0438\u0439\u0438\u0440\u0438 \u0431\u0430\u0440 \u0436\u0430\u043d\u0430 \u0431\u0438\u0440\u0438\u2010\u0431\u0438\u0440\u0438\u043d\u0435 \u0431\u0438\u0440 \u0442\u0443\u0443\u0433\u0430\u043d\u0434\u044b\u043a \u043c\u0430\u043c\u0438\u043b\u0435\u043a\u044b\u043b\u0443\u0443\u0433\u0430 \u0442\u0438\u0439\u0438\u0448.", + "metadata": { + "languages": [ + "rus", + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2490211a751af08c831f437250d70884", + "text": "Kissi, Northern wanda tu cio M\u025b pil\u0254\u0254 o wol\u0254\u0254 ni, le waa o ba nd\u0254\u0254 cio, o b\u025b\u025blen kenando ni, o t\u0254ngdo ni, b\u025btu n\u0254n yiyando a kullo, o kon ni naan tu dua mim maalyan kalapil\u0254y\u025byi ni.", + "metadata": { + "languages": [ + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "acff30c65cc8bd28c010b97e9b255653", + "text": "Kituba Bantu nyonso, na mbutukulu kevwandaka na kimpwanza ya bawu, ngenda mpe baluve ya mutindu mosi. Mayela na mbanzulu je na bawu, ni yawu yina bafwana kusalasana na bumpangi.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "89b560fcf7e82a3a650bee70ceeb01ac", + "text": "Kituba (2) Bantu nyonso ntangu bawu ke butukaka, bawu ke vwandaka na kimpwanza, ya kele mutindu mosi mpe na yina me tadila buzitu ya nzutu mpe baluve ya bawu. Bawu kele na mayindu mpe na bumuntu. Mpe nyonso yina bawu fwana kusala na sika ya bantu ya nkaka, bawu fwana kusala yawu na mpeve ya kimpangui.", + "metadata": { + "languages": [ + "swa", + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3da488a598903b0fa6a89a4d9b704219", + "text": "Komi-Permyak \u0411\u044b\u0434\u04e7\u0441 \u043e\u0442\u0438\u0440\u044b\u0441 \u0447\u0443\u0436\u04e7\u043d\u044b \u0432\u043e\u043b\u044c\u043d\u04e7\u0439\u0435\u0437\u04e7\u043d \u0434\u0430 \u04e7\u0442\u043a\u043e\u0434\u0434\u0435\u0437\u04e7\u043d \u0434\u043e\u0441\u0442\u043e\u0438\u043d\u0441\u0442\u0432\u043e\u044b\u043d \u0434\u0430 \u043f\u0440\u0430\u0432\u043e\u044d\u0437\u044b\u043d. \u041d\u044b\u043b\u04e7 \u0441\u0435\u0442\u04e7\u043c \u043c\u044b\u0432\u043a\u044b\u0434 \u0434\u0430 \u0441\u043e\u0432\u0435\u0441\u0442\u044c \u043e\u0432\u043d\u044b \u04e7\u0442\u0430\u043c\u04e7\u0434\u043d\u044b\u0441\u043a\u04e7\u0442 \u043a\u044b\u0434\u0437 \u0432\u043e\u043d\u043d\u044d\u0437\u043b\u04e7.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b613757216cf998e48abdf457b38e8e5", + "text": "Konjo Abandu omububuthiranwa bakabuthawa ibanawithe obuthoki nobuholho obulingirirene, mobahangikwa ibanawithe amenge, neryo ibakathoka erighabania abathya ekibuya nekisandire. Nokweryo buli muyima atholere eryanza munyikiwe ngababuthenwe.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8957157e481718ba32250db7a8d7c310", + "text": "Koongo Bantu nyonso, na mbutukulu kevwandaka na kimpwanza ya bawu, ngenda mpe baluve ya mutindu mosi. Mayela na mbanzulu je na bawu, ni yawu yina bafwana kusalasana na bumpangi.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b3972bc2704b772167f5cbc75e4d4660", + "text": "Koongo (Angola) Bizingi bioso bisiwu ti batu bambutukanga mu kidedi ki buzitu ayi kibumswa. Bizingi-bene, batu, badi diela ayi tsi-ntima, bafwene kuzingila mbatzi-na-mbatzi-yandi mu mtima bukhomba.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "71cc3fa5f30f347d8e225e871139661f", + "text": "Korean \ubaa8\ub4e0 \uc778\uac04\uc740 \ud0dc\uc5b4\ub0a0 \ub54c\ubd80\ud130 \uc790\uc720\ub85c\uc6b0\uba70 \uadf8 \uc874\uc5c4\uacfc \uad8c\ub9ac\uc5d0 \uc788\uc5b4 \ub3d9\ub4f1\ud558\ub2e4. \uc778\uac04\uc740 \ucc9c\ubd80\uc801\uc73c\ub85c \uc774\uc131\uacfc \uc591\uc2ec\uc744 \ubd80\uc5ec\ubc1b\uc558\uc73c\uba70 \uc11c\ub85c \ud615\uc81c\uc560\uc758 \uc815\uc2e0\uc73c\ub85c \ud589\ub3d9\ud558\uc5ec\uc57c \ud55c\ub2e4.", + "metadata": { + "languages": [ + "kor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ec837c06df9c110c22e734be4704e763", + "text": "Kpelle, Guinea Nukan gele kaa p\u0259l\u0259 kaa tan\u0254n, yili\u0253a nu k\u0259le maawiy\u0259 p\u0259l\u0259 da t\u0254\u0254i gaa \u0272ei y\u025bn\u025byii hu k\u025bp\u0259l\u0259 kaal\u0254 tan\u0254n; di k\u025bm\u025bni a nukan \u014baa \u0253\u0259 g\u025b\u025b hw\u0259k\u025bli w\u025blik\u025bmaa \u0259 l\u0254 di luwai.", + "metadata": { + "languages": [ + "som", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6322dea6cfe74f4e5e0272752dccffb4", + "text": "Krio \u025bvrib\u0254di b\u0254n fri \u025bn g\u025bt in yon rayt, n\u0254n wan n\u0254 pas in k\u0254mpin. Wi \u0254l ebul f\u0254 tink \u025bn f\u025bn\u0254t wetin rayt \u025bn r\u0254\u014b pantap dat wi f\u0254 sabi aw f\u0254 liv l\u025bk wan big famili.", + "metadata": { + "languages": [ + "ind", + "eng", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e4653071cb4a8a4f59ca7f62a50afbb4", + "text": "Kulango, Bouna Igooyoo p\u025b\u025b h\u028bn taa. B\u0254 p\u025b\u025b jabaga b\u0254r\u0254. H\u0254 ya g\u028b\u028bn\u2019n b\u0254\u0254 h\u025b p\u025b\u025b, h\u0254 h\u025b gus\u025bg\u025b\u2019n.", + "metadata": { + "languages": [ + "tgl", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "df4b88e2493c88f7b478eaece77dfdb7", + "text": "Kurdish, Central Hem\u00fb mirov azad \u00fb di weqar \u00fb mafan de wekhev t\u00ean dinyay\u00ea. Ew xwed\u00ee hi\u015f \u00fb \u015fu\u00fbr in \u00fb div\u00ea li hember hev bi zihniyeteke bratiy\u00ea bilivin.", + "metadata": { + "languages": [ + "tur", + "nld", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "26a7611f793432bd8ce6f6cb35470ad5", + "text": "Kurdish, Northern Hem\u00fb mirov azad \u00fb di weqar \u00fb mafan de wekhev t\u00ean dinyay\u00ea. Ew xwed\u00ee hi\u015f \u00fb \u015fu\u00fbr in \u00fb div\u00ea li hember hev bi zihniyeteke bratiy\u00ea bilivin.", + "metadata": { + "languages": [ + "nld", + "tur", + "eng", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0eaf9123417f2794584c7cfd20e10aee", + "text": "Ladin D\u00f6tes les porsones nasc l\u00ebdies y cun la medema dignit\u00e9 y i medemi d\u00ebr\u0107. Ares \u00e0 na rajun y na cosci\u00ebnza y m\u00ebss s\u2019incunt\u00e8 \u00f6na cun l\u2019atra te n spirit de fraternit\u00e9.", + "metadata": { + "languages": [ + "spa", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0e27738675f5136ec763f36ff9bb0ae2", + "text": "Ladino Todos los umanos nasen libres i iguales en dinyidad i derechos i, komo estan ekipados de razon i konsensia, deven komportarsen kon ermandad los unos kon los otros.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5590b8f08d34a13d98afa307c3a0db0a", + "text": "Lamnso' \u00c1 dz\u0259\u0300\u0259\u0301 wir dz\u0259\u0300m r\u00e9\u014br\u00e9\u014b f\u00f3 ghv\u0259m w\u00f9n \u00e0 f\u00f3 gh\u00e0y, \u00e1 yo\u2019 dz\u0259\u0300\u0259\u0301 wir ms\u00f2\u014b ji kw\u00e0n. W\u00ecr dz\u0259\u0300m k\u0300m k f\u00f3mo woo f\u00f3 kw\u00e0\u2019t\u00ec w\u00f9n \u00e0 f\u00f3 vifii, a w\u00f9 k\u00e9r f\u00f3 a yi\u00ec e w\u00f9m\u00f2\u2019 woo w\u00edr moo f\u0259\u0301r v\u0259.", + "metadata": { + "languages": [ + "vie", + "pol", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "ae451bf94c5e07470540741833822372", + "text": "Lao \u0ea1\u0eb0\u0e99\u0eb8\u0e94\u0ec0\u0e81\u0eb5\u0e94\u0ea1\u0eb2\u0ea1\u0eb5\u0eaa\u0eb4\u0e94\u0ec0\u0eaa\u0ea5\u0eb5\u0e9e\u0eb2\u0e9a \u0ec1\u0ea5\u0eb0 \u0eaa\u0eb0\u0ec0\u0edd\u0eb5\u0edc\u0ec9\u0eb2\u0e81\u0eb1\u0e99\u0ec3\u0e99\u0e97\u0eb2\u0e87\u0e81\u0ebd\u0e94\u0e95\u0eb4\u0eaa\u0eb1\u0e81 \u0ec1\u0ea5\u0eb0 \u0e97\u0eb2\u0e87\u0eaa\u0eb4\u0e94\u0e94\u0ec9\u0ea7\u0e8d\u0ea1\u0eb0\u0e99\u0eb8\u0e94\u0ea1\u0eb5\u0eaa\u0eb0\u0e95\u0eb4\u0eaa\u0eb3\u0e9b\u0eb1\u0e94\u0e8a\u0eb1\u0e99\u0e8d\u0eb0(\u0eae\u0eb9\u0ec9\u0e94\u0eb5\u0eae\u0eb9\u0ec9\u0e8a\u0ebb\u0ec8\u0ea7)\u0ec1\u0ea5\u0eb0\u0ea1\u0eb5\u0ea1\u0eb0\u0ec2\u0e99\u0e97\u0eb3\u0e88\u0eb7\u0ec8\u0e87\u0e95\u0ec9\u0ead\u0e87\u0e9b\u0eb0\u0e9e\u0eb6\u0e94\u0e95\u0ebb\u0e99\u0e95\u0ecd\u0ec8\u0e81\u0eb1\u0e99\u0ec3\u0e99\u0e97\u0eb2\u0e87\u0e9e\u0eb5\u0ec8\u0e99\u0ec9\u0ead\u0e87.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0ed168b2d2709781e3bd28875a311e5c", + "text": "Latin Omnes homines dignitate et iure liberi et pares nascuntur, rationis et conscientiae participes sunt, quibus inter se concordiae studio est agendum.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "390fa005137d580229352d11d7af483d", + "text": "Latin (1) Omnes homines liberi aequique dignitate atque juribus nascuntur. Ratione conscientiaque praediti sunt et alii erga alios cum fraternitate se gerere debent.", + "metadata": { + "languages": [ + "fra", + "cat", + "ron" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6cddab55572e83cd679bab750a745b46", + "text": "Latvian Visi cilv\u0113ki piedzimst br\u012bvi un vienl\u012bdz\u012bgi sav\u0101 pa\u0161cie\u0146\u0101 un ties\u012bb\u0101s. Vi\u0146i ir apvelt\u012bti ar sapr\u0101tu un sirdsapzi\u0146u, un vi\u0146iem j\u0101izturas citam pret citu br\u0101l\u012bbas gar\u0101.", + "metadata": { + "languages": [ + "lav" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "84c7cce831ebebafd545d3767089bc8f", + "text": "Latvian (2) Visi cilv\u0113ki piedzimst br\u012bvi un vienl\u012bdz\u012bgi cie\u0146\u0101 un ties\u012bb\u0101s. Vi\u0146iem ir dots sapr\u0101ts un sirdsapzi\u0146a, un vi\u0146iem citam pret citu j\u0101izturas br\u0101l\u012bbas gar\u0101.", + "metadata": { + "languages": [ + "lav" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c431b1dcba75dca04cdeaaa5388f19c0", + "text": "Ligurian Tutte e personn-e nascian libere e p\u00e6ge in dignit\u00e6 e driti. Son dot\u00e6 de raxon e coscensa e gh\u2019an da ag\u00ee l\u2019unn-a verso l\u2019atra inte \u2019n spirito de fradelansa.", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "693ef7caa32675b109893e37846d9f13", + "text": "Limba, West-Central Biya-m\u025bti fooma be kiyo ka kuyanka\u014b i\u014b kas\u025bmb\u025b m\u025bn\u025b in ka yiki. Bind\u025b ki\u014b ba niy\u0254 in masim\u0254k\u0254, maka yiina wo ka hu w\u025bndi yande.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d2f3db4ece1ba0a2826440f4e392a66d", + "text": "Lingala Bato nyonso na mbotama bazali nzomi pe bakokani na limemya pe makoki. Bazali na mayele pe basengeli kofanda na bondeko okati na bango.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6fcb989c6e738221bc467859b15c2d51", + "text": "Lingala (tones) Bato ny\u0254\u0301ns\u0254 na mbo\u0301tama bazali\u0301 ns\u0254\u0301mi\u0301 mpe\u0301 bako\u0301ka\u0301ni\u0301 na lim\u025bmya mpe\u0301 makoki\u0301. Bazali\u0301 na may\u025b\u0301l\u025b mpe\u0301 basenge\u0301li\u0301 kova\u0301nda na bondeko o ka\u0301ti na bango\u0301.", + "metadata": { + "languages": [ + "tgl", + "ces", + "hun" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "353adb6fb432616b715be3966a6d79bd", + "text": "Lithuanian Visi \u017emon\u0117s gimsta laisvi ir lyg\u016bs savo orumu ir teis\u0117mis. Jiems suteiktas protas ir s\u0105\u017ein\u0117 ir jie turi elgtis vienas kito at\u017evilgiu kaip broliai.", + "metadata": { + "languages": [ + "lit" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3e4f829a968d5f615b4245e85dc21d08", + "text": "Lobi Teehuu s\u028bn\u0254 n ther \u025b\u025b n\u0269\u0269 b\u028bn\u0254 wa n do deea\u0294 s\u0269 w\u028b n makha sam\u0269n\u0269 na n\u00e0 h\u028b t\u0269n\u025bpar r\u00e0. Thangba ti y\u025br \u00e0 p\u025b y\u025br j\u0269\u0269r n\u00e0 f\u0269lw\u025b s\u0269 a teena waan f\u028bkha omkhaa.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7c7f50be4d274486c143858905c69e06", + "text": "Lozi Batu kaufela ba pepilwe inge ba lukuluhile ni liswanelo ze swana. Ba ba ni swanelo ya ku nahana mi ba swanela ku ba ni likezo za buzwale ku mutu yo mung'wi.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6197bb27429c967b218d90ce1ccd2a0c", + "text": "Luba-Kasai Bantu bonsu badi baledibwa badikadile ne badi ne makokeshi amwe. Badi ne lungenyi lwa bumuntu ne kondo ka moyo, badi ne bwa kwenzelangana malu mu buwetu.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bc4bb086412d3334ab1dee422ea2cb3c", + "text": "Lunda Muntu wejima wasemuka walukbuka wesekana hamu ni akwawu mukumulemesha. Wenkewa kutong'ojoka nikuzatila hamu nimukwawu muntu muwunta'a.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "872908665791636f2ec3f0477922984f", + "text": "Luvale Vatu vosena vasemuka yapwa hohamwe nakweseka mukuyoya chavo. Vatwama nachiyoyelo chalusesa chajingolo chakuzanga kulivwashana muchiyoyelo chavo.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3f8cca735e9bb8ee68adff123b7ebdda", + "text": "Luxembourgeois All M\u00ebnsch k\u00ebnnt fr\u00e4i a mat deer selwechter Dignit\u00e9it an dene selwechte Rechter op d'Welt. Jiddereen huet s\u00e4i Verstand a s\u00e4i Gew\u00ebsse krut an soll an engem Geescht vu Bridderlechkeet denen anere g\u00e9intiwwer handelen.", + "metadata": { + "languages": [ + "nld", + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1a2cc3d892dc79a4b68cc59db7a69ea1", + "text": "Macedonian \u0421\u0438\u0442\u0435 \u0447\u043e\u0432\u0435\u0447\u043a\u0438 \u0441\u0443\u0448\u0442\u0435\u0441\u0442\u0432\u0430 \u0441\u0435 \u0440\u0430\u0453\u0430\u0430\u0442 \u0441\u043b\u043e\u0431\u043e\u0434\u043d\u0438 \u0438 \u0435\u0434\u043d\u0430\u043a\u0432\u0438 \u043f\u043e \u0434\u043e\u0441\u0442\u043e\u0438\u043d\u0441\u0442\u0432\u043e \u0438 \u043f\u0440\u0430\u0432\u0430. \u0422\u0438\u0435 \u0441\u0435 \u043e\u0431\u0434\u0430\u0440\u0435\u043d\u0438 \u0441\u043e \u0440\u0430\u0437\u0443\u043c \u0438 \u0441\u043e\u0432\u0435\u0441\u0442 \u0438 \u0442\u0440\u0435\u0431\u0430 \u0434\u0430 \u0441\u0435 \u043e\u0434\u043d\u0435\u0441\u0443\u0432\u0430\u0430\u0442 \u0435\u0434\u0435\u043d \u043a\u043e\u043d \u0434\u0440\u0443\u0433 \u0432\u043e \u0434\u0443\u0445\u043e\u0442 \u043d\u0430 \u043e\u043f\u0448\u0442\u043e \u0447\u043e\u0432\u0435\u0447\u043a\u0430\u0442\u0430 \u043f\u0440\u0438\u043f\u0430\u0434\u043d\u043e\u0441\u0442.", + "metadata": { + "languages": [ + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ce6f9e17e88d78727c8e1483fb614015", + "text": "Madura Sadajana oreng lahir mardika e sarenge drajat klaban hak-hak se dha-padha. Sadajana eparenge akal sareng nurani ban kodu areng-sareng akanca kadi taretan.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "2e4fdb7fcd2748cce07840226331c829", + "text": "Magahi \u0938\u092c \u0932\u094b\u0917 \u0906\u091c\u093e\u0926\u0947 \u091c\u0928\u094d\u092e \u0932\u0947\u092c \u0939\u0908 \u0924\u0925\u093e \u0938\u092c \u0915\u0947 \u092c\u0930\u093e\u092c\u0930\u0947 \u0938\u092e\u094d\u092e\u093e\u0928 \u0914\u0930 \u0905\u0927\u093f\u0915\u093e\u0930 \u0939\u0907\u0964 \u0939\u0941\u0928\u0916\u094b \u0915\u0947 \u092a\u093e\u0938 \u0938\u092e\u091d-\u092c\u0942\u091d \u0914\u0930 \u0905\u0902\u0924:\u0915\u0930\u0923 \u0915\u0947 \u0906\u0935\u093e\u091c \u0939\u094b\u092c \u0939\u0908\u0964 \u0914\u0930 \u0939\u0941\u0928\u0915\u093e \u0926\u094b\u0938\u0930\u094b \u0915\u0947 \u0938\u093e\u0925 \u092d\u093e\u0908\u091a\u093e\u0930\u093e \u0915\u0947 \u0935\u094d\u092f\u0935\u0939\u093e\u0930 \u0915\u0930\u0947 \u092a\u0921\u093c \u0939\u0908\u0964", + "metadata": { + "languages": [ + "hin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "d691df62a8af33ae0b9c152a092e32a9", + "text": "Maithili \u0938\u092d \u092e\u093e\u0928\u0935 \u091c\u0928\u094d\u092e\u0924\u0903 \u0938\u094d\u0935\u0924\u0928\u094d\u0924\u094d\u0930 \u0905\u091b\u093f \u0924\u0925\u093e \u0917\u0930\u093f\u092e\u093e \u0906\u02bc \u0905\u0927\u093f\u0915\u093e\u0930\u092e\u0947 \u0938\u092e\u093e\u0928 \u0905\u091b\u093f\u0964 \u0938\u092d\u0915\u0947\u0901 \u0905\u092a\u0928\u2013\u0905\u092a\u0928 \u092c\u0941\u0926\u094d\u0927\u093f \u0906\u02bc \u0935\u093f\u0935\u0947\u0915 \u091b\u0948\u0915 \u0906\u0913\u0930 \u0938\u092d\u0915\u0947\u0901 \u090f\u0915 \u0926\u094b\u0938\u0930\u093e\u0915 \u092a\u094d\u0930\u0924\u093f \u0938\u094c\u0939\u093e\u0930\u094d\u0926\u092a\u0942\u0930\u094d\u0923 \u0935\u094d\u092f\u0935\u0939\u093e\u0930 \u0915\u0930\u092c\u093e\u0915 \u091a\u093e\u0939\u0940\u0964", + "metadata": { + "languages": [ + "hin", + "nep" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d73cc566475e568433ff76c1fb6af485", + "text": "Makhuwa Atthu othene aniyaria oolikana ni owilamula moota ontthunaya okhala, variyari v\u2019edignidade ni edireito. Akhalanne esaria ni otthokelela, ahaana akhalasaka othene saya vamurettele.", + "metadata": { + "languages": [ + "swa", + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "166af43c7950017574b550ca090a6ff8", + "text": "Makonde Vanu vohevohe vaidile n\u2019chilambo valendene. Vanijaliwa ulimala vene. Pavele vanu pave na ulongo.", + "metadata": { + "languages": [ + "est", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b672ca63908de1ff358d10ef96fd3d81", + "text": "Malagasy, Plateau Teraka afaka sy mitovy zo sy fahamendrehana ny olombelona rehetra. Samy manan-tsaina sy fieritreretana ka tokony hifampitondra am- pirahalahiana.", + "metadata": { + "languages": [ + "ind", + "tgl", + "slk" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e74053233c7584ace3ddb4357ac894b7", + "text": "Malay (Arabic) \u0633\u0645\u0648\u0627 \u0645\u0623\u0646\u0633\u064a \u062f\u0644\u0627\u0647\u064a\u0631\u0643\u0646 \u0628\u064a\u0628\u0633 \u062f\u0627\u0646 \u0633\u0627\u0645\u0631\u0627\u062a \u062f\u0631\u064a \u0633\u06ac\u064a \u0643\u0645\u0648\u0644\u064a\u0623\u0646 \u062f\u0627\u0646 \u062d\u0642\u0662. \u0645\u0631\u064a\u0643 \u0645\u0645\u06a4\u0648\u06bd\u0627\u064a \u06a4\u0645\u064a\u0643\u064a\u0631\u0646 \u062f\u0627\u0646 \u06a4\u0631\u0627\u0633\u0623\u0646 \u0647\u0627\u062a\u064a \u062f\u0627\u0646 \u0647\u0646\u062f\u0642\u0644\u0647 \u0628\u0631\u062a\u064a\u0646\u062f\u0642 \u062f \u0627\u0646\u062a\u0627\u0631\u0627 \u0633\u0627\u062a\u0648 \u0633\u0627\u0645 \u0644\u0627\u0626\u0646 \u062f\u06a0\u0646 \u0633\u0645\u0627\u06a0\u062a \u06a4\u0631\u0633\u0627\u0648\u062f\u0627\u0631\u0623\u0646.", + "metadata": { + "languages": [ + "ara", + "fas" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cec58af843b3ffbe84e80cc0ac35d856", + "text": "Malay (Latin) Semua manusia dilahirkan bebas dan samarata dari segi kemuliaan dan hak-hak. Mereka mempunyai pemikiran dan perasaan hati dan hendaklah bertindak di antara satu sama lain dengan semangat persaudaraan.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "563cefb3266bb81ad240fb3d631fb5b0", + "text": "Malayalam \u0d2e\u0d28\u0d41\u0d37\u0d4d\u0d2f\u0d30\u0d46\u0d32\u0d4d\u0d32\u0d3e\u0d35\u0d30\u0d41\u0d02 \u0d24\u0d41\u0d32\u0d4d\u0d2f\u0d3e\u0d35\u0d15\u0d3e\u0d36\u0d19\u0d4d\u0d19\u0d33\u0d4b\u0d1f\u0d41\u0d02 \u0d05\u0d28\u0d4d\u0d24\u0d38\u0d4d\u0d38\u0d4b\u0d1f\u0d41\u0d02 \u0d38\u0d4d\u0d35\u0d3e\u0d24\u0d28\u0d4d\u0d24\u0d4d\u0d30\u0d4d\u0d2f\u0d24\u0d4d\u0d24\u0d4b\u0d1f\u0d41\u0d02\u0d15\u0d42\u0d1f\u0d3f \u0d1c\u0d28\u0d3f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d4d\u0d1f\u0d41\u0d33\u0d4d\u0d33\u0d35\u0d30\u0d3e\u0d23\u0d4d\u200c. \u0d05\u0d28\u0d4d\u0d2f\u0d4b\u0d28\u0d4d\u0d2f\u0d02 \u0d2d\u0d4d\u0d30\u0d3e\u0d24\u0d43\u0d2d\u0d3e\u0d35\u0d24\u0d4d\u0d24\u0d4b\u0d1f\u0d46 \u0d2a\u0d46\u0d30\u0d41\u0d2e\u0d3e\u0d31\u0d41\u0d35\u0d3e\u0d28\u0d3e\u0d23\u0d4d\u200c \u0d2e\u0d28\u0d41\u0d37\u0d4d\u0d2f\u0d28\u0d4d\u0d28\u0d41 \u0d35\u0d3f\u0d35\u0d47\u0d15\u0d2c\u0d41\u0d26\u0d4d\u0d27\u0d3f\u0d2f\u0d41\u0d02 \u0d2e\u0d28\u0d38\u0d4d\u0d38\u0d3e\u0d15\u0d4d\u0d37\u0d3f\u0d2f\u0d41\u0d02 \u0d38\u0d3f\u0d26\u0d4d\u0d27\u0d2e\u0d3e\u0d2f\u0d3f\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d24\u0d4d\u200c.", + "metadata": { + "languages": [ + "mal" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a1c5471ea369ac3ba44f2829262f62aa", + "text": "Malayalam \u0d2e\u0d28\u0d41\u0d37\u0d4d\u0d2f\u0d30\u0d46\u0d32\u0d4d\u0d32\u0d3e\u0d35\u0d30\u0d41\u0d02 \u0d24\u0d41\u0d32\u0d4d\u0d2f\u0d3e\u0d35\u0d15\u0d3e\u0d36\u0d19\u0d4d\u0d19\u0d33\u0d4b\u0d1f\u0d41\u0d02 \u0d05\u0d28\u0d4d\u0d24\u0d38\u0d4d\u0d38\u0d4b\u0d1f\u0d41\u0d02 \u0d38\u0d4d\u0d35\u0d3e\u0d24\u0d28\u0d4d\u0d24\u0d4d\u0d30\u0d4d\u0d2f\u0d24\u0d4d\u0d24\u0d4b\u0d1f\u0d41\u0d02\u0d15\u0d42\u0d1f\u0d3f \u0d1c\u0d28\u0d3f\u0d1a\u0d4d\u0d1a\u0d3f\u0d1f\u0d4d\u0d1f\u0d41\u0d33\u0d4d\u0d33\u0d35\u0d30\u0d3e\u0d23\u0d4d\u200c. \u0d05\u0d28\u0d4d\u0d2f\u0d4b\u0d28\u0d4d\u0d2f\u0d02 \u0d2d\u0d4d\u0d30\u0d3e\u0d24\u0d43\u0d2d\u0d3e\u0d35\u0d24\u0d4d\u0d24\u0d4b\u0d1f\u0d46 \u0d2a\u0d46\u0d30\u0d41\u0d2e\u0d3e\u0d31\u0d41\u0d35\u0d3e\u0d28\u0d3e\u0d23\u0d4d\u200c \u0d2e\u0d28\u0d41\u0d37\u0d4d\u0d2f\u0d28\u0d4d\u0d28\u0d41 \u0d35\u0d3f\u0d35\u0d47\u0d15\u0d2c\u0d41\u0d26\u0d4d\u0d27\u0d3f\u0d2f\u0d41\u0d02 \u0d2e\u0d28\u0d38\u0d4d\u0d38\u0d3e\u0d15\u0d4d\u0d37\u0d3f\u0d2f\u0d41\u0d02 \u0d38\u0d3f\u0d26\u0d4d\u0d27\u0d2e\u0d3e\u0d2f\u0d3f\u0d30\u0d3f\u0d15\u0d4d\u0d15\u0d41\u0d28\u0d4d\u0d28\u0d24\u0d4d\u200c.", + "metadata": { + "languages": [ + "mal" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "abe9340337f1806d7c7bb1e55e23819f", + "text": "Maldivian \u0780\u07aa\u0783\u07a8\u0780\u07a7 \u0787\u07a8\u0782\u07b0\u0790\u07a7\u0782\u07aa\u0782\u07b0\u0788\u07ac\u0790\u07b0 \u078b\u07aa\u0782\u07a8\u0794\u07ac\u0787\u07a6\u0781\u07b0 \u0787\u07aa\u078a\u07a6\u0782\u07b0\u0788\u07a6\u0782\u07a9\u060c \u0789\u07a8\u0782\u07a8\u0788\u07a6\u0782\u07b0\u0786\u07a6\u0789\u07aa\u078e\u07a6\u0787\u07a8\u060c \u0780\u07a6\u0789\u07a6\u0780\u07a6\u0789\u07a6 \u0799\u07a6\u0787\u07b0\u07a4\u07aa\u078c\u07a6\u0786\u07a6\u0786\u07a7\u0787\u07ac\u0786\u07aa\u060c \u0780\u07a6\u0789\u07a6\u0780\u07a6\u0789\u07a6 \u078b\u07a6\u0783\u07a6\u0796\u07a6\u0787\u07ac\u0787\u07b0\u078e\u07a6\u0787\u07a8 \u0786\u07a6\u0789\u07ad\u0780\u07a8\u078c\u07ac\u0788\u07a8\u078e\u07ac\u0782\u07b0\u0788\u07a7 \u0784\u07a6\u0787\u07ac\u0787\u07b0\u078e\u07ac \u078e\u07ae\u078c\u07aa\u078e\u07a6\u0787\u07ac\u0788\u07ac. \u0780\u07ac\u0794\u07ae \u0788\u07a8\u0790\u07b0\u0782\u07aa\u0789\u07a7\u0787\u07a8\u060c \u0780\u07ac\u0794\u07ae\u0784\u07aa\u0787\u07b0\u078b\u07a9\u078e\u07ac \u0784\u07a7\u0783\u07aa \u0787\u07ac\u0789\u07a9\u0780\u07aa\u0782\u07b0\u0782\u07a6\u0781\u07b0 \u078d\u07a8\u0784\u07a8\u078e\u07ac\u0782\u07b0\u0788\u07ac\u0787\u07ac\u0788\u07ac. \u0787\u07a6\u078b\u07a8 \u0787\u07ac\u0786\u07a6\u0786\u07aa \u0787\u07a6\u0782\u07ac\u0786\u07a6\u0786\u07a7\u0789\u07ac\u078b\u07aa \u0787\u07ac\u0789\u07a9\u0780\u07aa\u0782\u07b0 \u0789\u07aa\u07a2\u07a7\u0789\u07a6\u078d\u07a7\u078c\u07b0 \u0786\u07aa\u0783\u07a6\u0782\u07b0\u0788\u07a7\u0782\u07a9\u060c \u0787\u07aa\u079a\u07aa\u0787\u07b0\u0788\u07a6\u078c\u07b0\u078c\u07ac\u0783\u07a8\u0786\u07a6\u0789\u07aa\u078e\u07ac \u0783\u07ab\u0799\u07ac\u0787\u07b0\u078e\u07a6\u0787\u07ac\u0788\u07ac.", + "metadata": { + "languages": [ + "ara" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c3f212c4f2a219b94139b577bd336587", + "text": "Maltese Il-bnedmin kollha jitwieldu \u0127ielsa u ugwali fid-dinjit\u00e0 u d-drittijiet. Huma mog\u0127nija bir-ra\u0121uni u bil-kuxjenza u g\u0127andhom i\u0121ibu ru\u0127hom ma\u2019 xulxin bi spirtu ta\u2019 a\u0127wa.", + "metadata": { + "languages": [ + "hrv", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d19e3ea923ac9598f7ebe493963dcb57", + "text": "Mam, Northern Kyaqiilqe winaq nchi itz'aj tuj kopib'il, juunx kychuwiinqal b'ix kyokleen, kyja'tzan tuj tb'aanal xiinv'il tu'n kyanq'iin tuj b'ank'u'j kyxool.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "53014d120e3ef288a2152a64e8cc5fae", + "text": "Maninkakan, Eastern Adamadennu b\u025b\u025b s\u0254d\u0254n\u0272a kakan, h\u0254r\u0254ya d\u0254, fabaden\u0272a d\u0254 ani sariya ta fan d\u0254. Hankili ni s\u0254n\u0254m\u025b ye alu b\u025b\u025b ma, a kakan wo d\u0254 alu ye bakelen\u0272a sila lataaman alu \u0272\u0254\u0254n t\u025b.", + "metadata": { + "languages": [ + "ind", + "tur", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "61226d5c10b4c0396f2f95f2ca652df3", + "text": "Manx Ta dy chooilley ghooinney ruggit seyr as corrym rish dy chooilley ghooinney elley ayns ooashley as ayns cairys. Ta resoon as cooinsheanse stowit orroo as lhisagh ad dellal rish y cheilley lesh spyrryd braaragh.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a937f2e976892410723177039216ec66", + "text": "Maori Ko te katoa o nga tangata i te whanaungatanga mai e watea ana i nga here katoa; e tauriterite ana hoki nga mana me nga tika. E whakawhiwhia ana hoki ki a ratou te ngakau whai whakaaro me te hinengaro mohio ki te tika me te he, a e tika ana kia meinga te mahi a tetahi ki tetahi me ma roto atu i te wairua o te noho tahi, ano he teina he tuakana i ringa i te whakaaro kotahi.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "dae3f973f6bbdd3401ce4aa3e297b361", + "text": "Mapudungun Kom pu mogence kisuzuam mvlekey, kom cegeygvn, logkogeygvn ka piwkegeygvn, nieygvn kimvn fey mew mvley ta\u00f1i yamniewael ka epu\u00f1pvle kejuwael egvn.", + "metadata": { + "languages": [ + "ind", + "dan", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ecca335c6a309f063e4df0ad38eecd27", + "text": "Marathi \u0938\u0930\u094d\u0935 \u092e\u093e\u0928\u0935\u0940 \u0935\u094d\u092f\u0915\u094d\u0924\u093f \u091c\u0928\u094d\u092e\u0924\u0903\u091a \u0938\u094d\u0935\u0924\u0902\u0924\u094d\u0930 \u0906\u0939\u0947\u0924 \u0935 \u0924\u094d\u092f\u093e\u0902\u0928\u093e \u0938\u092e\u093e\u0928 \u092a\u094d\u0930\u0924\u093f\u0937\u094d\u0920\u093e \u0935 \u0938\u092e\u093e\u0928 \u0905\u0927\u093f\u0915\u093e\u0930 \u0906\u0939\u0947\u0924. \u0924\u094d\u092f\u093e\u0902\u0928\u093e \u0935\u093f\u091a\u093e\u0930\u0936\u0915\u094d\u0924\u093f \u0935 \u0938\u0926\u0938\u0935\u093f\u0926\u094d\u0935\u0947\u0915\u092c\u0941\u0926\u094d\u0927\u093f \u0932\u093e\u092d\u0932\u0947\u0932\u0940 \u0906\u0939\u0947. \u0935 \u0924\u094d\u092f\u093e\u0902\u0928\u0940 \u090f\u0915\u092e\u0947\u0915\u093e\u0902\u0936\u0940 \u092c\u0902\u0927\u0941\u0924\u094d\u092f\u093e\u091a\u094d\u092f\u093e \u092d\u093e\u0935\u0928\u0947\u0928\u0947 \u0906\u091a\u0930\u0923 \u0915\u0930\u093e\u0935\u0947.", + "metadata": { + "languages": [ + "mar" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "01fca41684c1b7b968a512dfeec0139e", + "text": "Marshallese Armij otemjej rej rujlok ilo anemkwoj im jonon utiej eo im maron ko air wot juon. Emwij lelok non ir maron in bukot non ir make im bareinwot boklikot kin men ko rej tomaki im bwe jerbal non dron ilo juon jitobon jimpenjatin.", + "metadata": { + "languages": [ + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3a69fb7fe5d36459edf30ffa8f0fb0bc", + "text": "Mats\u00e9s Chidon tishaido yec matses abitedimbo b\u00ebdamboec isnanac b\u00ebdambo ictsiash. Chieshnanac icsambo ictsiash. Abitedimbo b\u00ebdamboec tabadac b\u00ebdambo ictsiash. Shubu abents\u00ebcquid\u00ebn tabadac birnboec abitedi tabadac b\u00ebdambo ictsiash - quequin chuipan\u00ebdash nidaid abitedino\u00ebsh cho-choquidon.", + "metadata": { + "languages": [ + "eng", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9c3467ac29002d9da69f15b063e13924", + "text": "Maya, Yucat\u00e1n Tul\u00e1akal w\u00edinik ku s\u00edijil j\u00e1alk\u02bcab yetel keet u tsiikul yetel Najmal Sijnalil, beytun xan na\u02bcata\u02bcan sijnalil yetel no\u02bcoja\u02bcanil u tuukulo\u02bc, k\u02bca\u02bcabet u bisikuba bey l\u00e1aktzilil yetel tul\u00e1akal u baatzile\u02bc.", + "metadata": { + "languages": [ + "hun", + "ind", + "tur", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "7947c1a7d2c92cd1fea5311d4d9241ba", + "text": "Mazahua Central Texe yo nte\u0331'e\u0331 chjetrjoji, angezeji ximi xo'oji \u00f1eje k'inchiji, nesta ra ngara na jo'o k'o dyaja e nte\u0331'e\u0331.", + "metadata": { + "languages": [ + "hrv", + "slv", + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ded8e8298bf9edcaae477d35c01be283", + "text": "Mazatec, Ixcatl\u00e1n Nga ndindie xuta ngatsen de\u2019e ko ngondsejen ngatjin-kjua nga xchandinkon nt\u2019a ngondsejen ngatjin kokjin-tokon,kotjinkjua nga takie engajan skuendinkon xkjin.", + "metadata": { + "languages": [ + "sqi", + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "150dfe46097e25b8aa601565b3487049", + "text": "Mbundu O athu woso avwala abhuluka ni kusokela mu kijingu ni mu itekelu. Ene ala ni ulungilu ni kilunji ni atokala kulaya kumoxi nya akwa mu mixima ya undandu.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "407b0080d05f944ba83f5c3e722bde13", + "text": "Mbundu (009) Mutu uoso uoso a mu vuala ni ufolo ni kutena kumoxi mu kijingu ni mu ubinganu. Mu kilembu kia kubanga ni mu ubanzelu, Atena u\u00ea kubanga ioso kua akua mu muxima ua tululuka mba upange.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d76da3518499aeb0e43b4c133556d135", + "text": "Mende Numuvuisia Kp\u025bl\u025b\u025b ta ti le t\u025b y\u025b nduw\u0254 ya hu, tao ti nuvuu yei k\u025b\u025b ti l\u0254nyi maa h\u025bwung\u0254. Kiiya k\u025b\u025b hindaluahu g\u0254\u0254la a y\u025bl\u0254 ti hun. Fale mahoung\u0254 ti ti ny\u0254ny\u0254hu hoi kia ndeegaa.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ac3c7d9dea662f8ba1dfb383045ce903", + "text": "Micmac Msit mimajulnu\u2019k weskwijinu\u2019ltijik alsumsultijik aqq newte\u2019 tett wkpimte\u2019tmut aqq koqwajo\u2019taqnn wejkul\u2019aqmititl.", + "metadata": { + "languages": [ + "est", + "sqi", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3bc1008b1e95383cab780d483a216d43", + "text": "Minangkabau Sadonyo manusia dilahiakan mardeka dan punyo martabat sarato hak-hak nan samo. Mareka dikaruniai aka jo hati nurani, supayo satu samo lain bagaul sarupo urang badunsanak.", + "metadata": { + "languages": [ + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "208949d3fb140dd9413f78a99feda832", + "text": "M\u00edskito Upla sut ba kulkanka lakara, airaitka nanira bara pri, sin, aikuki, baku takisa. Bamna sins laka bri baku, lukanka bain pri baku aimuihni lakara, pana pana tabaikan kaiasa.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "db840a4da82f82310ee839cd22112f22", + "text": "Mixe, Totontepec Tum akijpxa xa ve\u2019e jayu kye\u2019ex, ve\u2019em ax j\u00f6\u2019n tyukidaakj\u00fcva tijaty m\u00ebkin; ve\u2019empa axj\u00f6\u2019n j\u00e4 jy\u00f6\u00f6jtykin di yaknaxy, jats oy myujaty\u00f6\u00f6\u2019t\u00ebjk di m\u00eb\u00ebt nayjavaj\u00fct.", + "metadata": { + "languages": [ + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bbe9fa33187b976f4032c34c6ca2fabf", + "text": "Mixtec, Metlat\u00f3noc Taka ma \u00f1ayi nguiakoi \u00f1ayivi \u00f1atu na ja'a tnu'u ja kusa'a ndeva'\u00f1a-i, su'uva kajito va'a\u00f1a-i, yuka ku ja jini\u00f1u'u ja kukototna-i.", + "metadata": { + "languages": [ + "hrv", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "03b6cefe8d16c5c896f974b268a52302", + "text": "Mizo Mi zawng zawng hi zal\u00eana piang kan ni a, zahawmna leh dikna chanvoah intluk tl\u00e2ng vek kan ni. Chhia leh tha hriatna f\u00eem neia siam kan nih avangin kan mihring puite chungah inunauna thinlung kan pu tlat tur a ni.", + "metadata": { + "languages": [ + "ind", + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ec6cdd4d644ddfaafbb05d9216ebbd7c", + "text": "Moba Nifoi kul maal yendu buam po i, k b yudand yen b yiko-nba bi\u025b ja. B m\u0254g maalm g ban yal g \u014ban, g bi\u025b baa bu yen lieb naataann n nin\u014b i.", + "metadata": { + "languages": [ + "ind", + "som", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "0d21e19f00c8cb7264e83c01c0f02161", + "text": "Mon \u1019\u105e\u102d\u101f\u103a\u1002\u1019\u1060\u102d\u102f\u105a\u103a \u1021\u102d\u102f\u103f\u102e\u102f\u1010\u1021\u103a\u101d\u103d\u1036 \u1005\u1014\u1030\u101e\u1060\u1038\u1010\u102d\u1010\u103a \u1014\u1030\u1002\u101d\u103a\u1002\u105e\u1034 \u1012\u103e\u103a\u1019\u105e\u102d\u101f\u103a\u101e\u1060\u1038\u1015\u103d\u1038\u1021\u102d\u102f\u1010\u103a\u1010\u102f\u1032 \u1021\u1001\u1031\u102b\u105a\u103a\u1021\u101b\u102c \u1000\u1031\u102f\u102c\u1036 \u101e\u102d\u1000\u1039\u1001\u102c\u1019\u105e\u102d\u101f\u103a\u1010\u1021\u103a \u1010\u102f\u1015\u103a \u101e\u105f\u101f\u103a\u101b\u104b \u1019\u105e\u102d\u101f\u103a\u1010\u1021\u103a\u1002\u103e\u103a \u1014\u103d\u1036\u1000\u1035\u102f\u1013\u101b\u103a\u1005\u105a\u103a\u1001\u103c\u105a\u103a\u1000\u1031\u102f\u102c\u1036 \u101e\u1019\u1039\u1010\u102e\u100a\u102c\u100f\u103a \u1013\u101d\u103a\u1015\u102b\u103a\u1015\u1032\u102b \u1001\u102d\u102f\u101f\u103a\u1015\u101b\u1031\u1036\u1014\u103d\u1036\u1010\u102f\u1032 \u100a\u1038\u1019\u103d\u1032 \u1000\u1031\u102f\u102c\u1036 \u100a\u1038\u1019\u103d\u1032 \u1011\u1031\u1000\u103a\u1000\u1035\u102f \u101e\u1039\u1012\u1038\u1012\u1039\u1002\u1031\u1010\u103a\u1017\u1000\u103a \u1006\u1000\u103a\u1006\u1031\u102c\u1036\u100a\u1038\u101e\u1039\u1000\u1021\u103a \u1014\u1005\u102d\u102f\u1010\u103a\u1013\u102c\u1010\u103a\u1000\u1031\u102c\u1036\u1012\u1031\u1036\u1021\u101b\u1031\u104b", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a36553665277971db5d4c68908f99088", + "text": "Mongolian, Halh (Cyrillic) \u0425\u04af\u043d \u0431\u04af\u0440 \u0442\u04e9\u0440\u0436 \u043c\u044d\u043d\u0434\u043b\u044d\u0445\u044d\u0434 \u044d\u0440\u0445 \u0447\u04e9\u043b\u04e9\u04e9\u0442\u044d\u0439, \u0430\u0434\u0438\u043b\u0445\u0430\u043d \u043d\u044d\u0440 \u0442\u04e9\u0440\u0442\u044d\u0439, \u0438\u0436\u0438\u043b \u044d\u0440\u0445\u0442\u044d\u0439 \u0431\u0430\u0439\u0434\u0430\u0433. \u041e\u044e\u0443\u043d \u0443\u0445\u0430\u0430\u043d, \u043d\u0430\u043d\u0434\u0438\u043d \u0447\u0430\u043d\u0430\u0440 \u0437\u0430\u044f\u0430\u0441\u0430\u043d \u0445\u04af\u043d \u0433\u044d\u0433\u0447 \u04e9\u04e9\u0440 \u0445\u043e\u043e\u0440\u043e\u043d\u0434\u043e\u043e \u0430\u0445\u0430\u043d \u0434\u04af\u04af\u0433\u0438\u0439\u043d \u04af\u0437\u044d\u043b \u0441\u0430\u043d\u0430\u0430\u0433\u0430\u0430\u0440 \u0445\u0430\u0440\u044c\u0446\u0430\u0445 \u0443\u0447\u0438\u0440\u0442\u0430\u0439.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "d68747fffbd22857ff75b3bfe7dc00c4", + "text": "Mongolian, Halh (Mongolian)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "ffd087e56c47b9405e77d2f08dca7d1e", + "text": "\u182c\u1826\u182e\u1826\u1828 \u182a\u1826\u1837 \u1832\u1825\u1837\u1825\u1835\u1826 \u182e\u1821\u1828\u1833\u1821\u182f\u1821\u182c\u1826 \u1821\u1837\u182c\u1821 \u1834\u1822\u182f\u1825\u182d\u1821\u202f\u1832\u1821\u1822\u1802 \u1820\u1833\u1820\u182f\u1822\u182c\u1820\u1828 \u1828\u1821\u1837\u180e\u1821 \u1832\u1825\u1837\u1825\u202f\u1832\u1821\u1822\u1802 \u1822\u1835\u1822\u182f \u1821\u1837\u182c\u1821\u202f\u1832\u1821\u1822 \u182a\u1820\u1822\u1820\u182d\u1803 \u1823\u1836\u1824\u1828 \u1824\u182c\u1820\u182d\u1820\u1828\u1802 \u1828\u1820\u1828\u1833\u1822\u1828 \u1834\u1822\u1828\u1820\u1837 \u1835\u1820\u1836\u1820\u182d\u1820\u1830\u1820\u1828 \u182c\u1826\u182e\u1826\u1828 \u182c\u1821\u182d\u1834\u1822 \u1825\u182d\u1821\u1837\u180e\u1821 \u182c\u1823\u182d\u1823\u1837\u1823\u1828\u1833\u1823\u180e\u1828 \u1820\u182c\u1820\u1828 \u1833\u1821\u182d\u1826\u1826\u202f\u1822\u1828 \u1826\u1835\u1822\u182f \u1830\u1820\u1828\u1820\u182d\u1820\u202f\u1825\u1820\u1837 \u182c\u1820\u1837\u1822\u1834\u1820\u182c\u1825 \u1824\u1834\u1822\u1837\u202f\u1832\u1820\u1822\u1803", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3d0a59b543e077c2f0c391add9b38a89", + "text": "Montenegrin Sva ljudska bi\u0107a ra\u0111aju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i savje\u0161\u0107u i jedni prema drugima treba da postupaju u duhu bratstva.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "86eff2400c116e5d00b9f1b3e17e0d7f", + "text": "M\u00f2or\u00e9 Ninsaalb\u00e3 f\u00e3a s\u00e3 n doge, ned f\u00e3a so a menga, ned pa rogd n yaa yamb ye, neb\u00e3 f\u00e3a zema taab b yel-segd\u0269 la b burk\u0129ndlem w\u025b\u025bnge\u0303. Neb\u00e3 f\u00e3a tara yam la tagsgo, ned f\u00e3a togame n v\u0269\u0269nd ne a to saam-biir p\u028age\u0303.", + "metadata": { + "languages": [ + "som", + "ind", + "cym" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "91eb2842523b8e930ee6199a0098fa14", + "text": "Moro Le\u0111a pre\u0111 lal\u01dd\u014b\u01ddnia l\u00ebb\u01ddr\u00ebinialo na l\u01dd\u027d\u01ddwa\u1e6fo e\u014ben \u014b\u01dd\u0111amia na e\u014ben pre\u0111 i\u014bi \u014b\u01ddrca\u0111a\u1e6fo \u1e6fa le\u0111a al\u01ddfi\u0111i. L\u00ebn\u014bulu pre\u0111 lanan\u00ebinu \u0111\u01ddnaca \u0111ame \u027det\u01dd\u027deto na ara g\u01dd\u014b\u01ddra \u014ben\u014ban\u1e6fa al\u01dd\u027d\u01ddwa\u0111a\u1e6fe alam\u01dd\u0111ai\u0111e b\u01dd\u027dan usilaga g\u01dd\u014b\u01ddl\u01dd\u014b\u01ddnia na g\u01dd\u014borba.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0e458a0b7d5fb50416d274c11e747017", + "text": "Mozarabic Totos les esseres humanos nascent libberos et eguales in dignitate e dretos e, dotatos commo stant de racione e conscientia, devent comportarse in germanitate les unos con les altros.", + "metadata": { + "languages": [ + "spa", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "25ab4cdce4c3199b55a4bd49864e981b", + "text": "Naga, Ao Meimchir ajak temeten aser tashi kasa n\u00fcji nung asor. Parnok dak bilemtetts\u00fc shisats\u00fc aser tangatetba kasa ag\u00fcja aliba jagi k\u00fclem adianu rongnung tanela ka nung lungjema alits\u00fcla.", + "metadata": { + "languages": [ + "ind", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ae7016d3a16e6fef244158363a24ac9e", + "text": "Nahuatl, Central Nochi tlakamej uan siuamej kipiaj manoj kuali tlakatisej, nochi san se totlatechpouiltilis uan titlatepanitalojkej, yeka moneki kuali ma timouikakaj, ma timoiknelikaj, ma timotlasojtlakaj uan ma timotlepanitakaj.", + "metadata": { + "languages": [ + "hrv", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9376ea8b7100165bb8bd466c00f5bdcc", + "text": "Nanai \u0425\u044d\u043c\u0442\u0443 \u043d\u0430\u0438\u0306\u0441\u0430\u043b \u0433\u0438\u043f\u0430\u043b\u0438\u043d, \u043c\u044d\u043d\u044d \u0433\u044d\u0431\u0443\u0434\u0438\u044d\u0440\u0438, \u043f\u0440\u0430\u0432\u043e\u0441\u0430\u043b\u0434\u0438\u0430\u0440\u0438 \u044d\u043c\u0443\u0442\u0443 \u0431\u0430\u043b\u0434\u0438\u0447\u0438. \u041d\u0435\u0308\u0430\u043d\u0447\u0438 \u043c\u0443\u0440\u0443\u04c8\u043a\u0443, \u0434\u044d\u0440\u044d\u043b\u043a\u0443, \u0434\u0438\u0430 \u0434\u0438\u0430\u0432\u0430\u0440\u0438 \u0430-\u043d\u044d\u0443-\u043c\u044d\u0442 \u0431\u043e\u0434\u043e\u043c\u0430\u0440\u0438 \u0442\u0430\u0433\u0438\u043b\u0430\u0438\u0306\u0447\u0438.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "201308d749f47555d03c5087f304457b", + "text": "Navajo Bila\u02bcashda\u02bcii t\u02bc\u00e1\u00e1 a\u0142tsoh yin\u00edk\u02bcehgo bidizhch\u012fh d\u00f3\u00f3 ahee\u0142t\u02bceego \u00edl\u012f\u0301\u012f\u0301go bee baah\u00f3ch\u012f\u02bc. E\u00ed\u00ed h\u00e1n\u00ed\u02bc d\u00f3\u00f3 h\u00e1n\u00edtshakees hwiihdaasya\u02bc e\u00ed\u00ed binahj\u012f\u0301\u02bc ahidin\u00ed\u0142n\u00e1hgo \u00e1l\u00edleek\u02bcehgo k\u02bc\u00e9 bee ahi\u0142 niidl\u012f\u0301.", + "metadata": { + "languages": [ + "som", + "ces" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7c1696d6b3e99d3ac8481060f74fe8d7", + "text": "Ndebele Abantu bonke bazalwa bekhululekile njalo belingana kumalungelo abo. Balesipho sikanembeza, ngakho bamele baphathane ngomoya otshengisa ubuhlobo lobunye.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d63b3107bf325e2c695213bc9dd8742e", + "text": "Ndonga Aantu ayehe oya valwa ye na emanguluko noye na ondilo yi thike pamwe osho wo uuthemba. Oye na omaipulo goondunge neiuvo onkene naa kalathane mombepo yuumwainathana.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a0cad811bb49185b6fdb66fb2060c59a", + "text": "Nenets \u0415\u0442 \u0445\u0438\u0431\u044f\u0440\u0438 \u043d\u0435\u043d\u044d\u0446\u044c \u0441\u043e\u044f\u043c\u0430\u0440\u0438\u0430\u043d\u0442\u0430 \u0445\u0443\u0440\u043a\u0430\u0440\u0438 \u043f\u0440\u0430\u0432\u0430\u0434\u0430 \u0442\u043d\u044f\u0432\u0430, \u04c8\u043e\u0431\u043e\u0439 \u043d\u0435\u043d\u044d\u0446\u044f \u043d\u0438\u0434\u0443 \u043d\u0438\u0441\u044c \u0442\u043e\u043a\u0430\u043b\u0431\u0430, \u04c8\u044b\u0431\u0442\u0430\u043c\u0431\u0430 \u0438\u043b\u0435\u0432\u0430\u0442\u0443 \u0442\u0430\u0440\u0430.", + "metadata": { + "languages": [ + "rus", + "bul" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "80851f8727cbd5baeb6611ada10ff1f9", + "text": "Nepali \u0938\u092c\u0948 \u0935\u094d\u092f\u0915\u094d\u0924\u093f \u0939\u0930\u0942 \u091c\u0928\u094d\u092e\u091c\u093e\u0924 \u0938\u094d\u0935\u0924\u0928\u094d\u0924\u094d\u0930 \u0939\u0941\u0928 \u0924\u0940 \u0938\u092c\u0948\u0915\u094b \u0938\u092e\u093e\u0928 \u0905\u0927\u093f\u0915\u093e\u0930 \u0930 \u092e\u0939\u0924\u094d\u0935 \u091b\u0964 \u0928\u093f\u091c\u0939\u0930\u0942\u092e\u093e \u0935\u093f\u091a\u093e\u0930 \u0936\u0915\u094d\u0924\u093f \u0930 \u0938\u0926\u094d\u0927\u093f\u091a\u093e\u0930 \u092d\u090f\u0915\u094b\u0932\u0947 \u0928\u093f\u091c\u0939\u0930\u0942\u0932\u0947 \u0906\u092a\u0938\u092e\u093e \u092d\u093e\u0924\u0943\u0924\u094d\u0935\u0915\u094b \u092d\u093e\u0935\u0928\u093e \u092c\u093e\u091f \u0935\u094d\u092f\u0935\u0939\u093e\u0930 \u0917\u0930\u094d\u0928\u0941 \u092a\u0930\u094d\u091b\u0964", + "metadata": { + "languages": [ + "nep" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "23ce504c8239c6964f02399ff1fcb1bf", + "text": "Nganasan \u0411\u04d9\u043d\u0434\u0435\u201d \u04c8\u0430\u043d\u0430\u0441\u0430\u043d\u04d9\u201d \u04c8\u04d9\u0442\u0443\u043a\u04d9\u043d\u0434\u044b\u201d \u043d\u0435\u043d\u0434\u044f\u201d\u0442\u0443\u043e\u201d \u04c8\u043e\u043d\u04d9 \u0445\u043e\u043d\u0441\u044b \u0445\u0435\u043b\u0438\u0434\u0435\u201d \u04c8\u0438\u043b\u0435 \u043c\u04d9\u043d\u04d9\u0439 (\u043f\u0440\u0430\u0432\u0430\u0439). \u0421\u044b\u0442\u044b\u04c8 \u0445\u043e\u043d\u0434\u044b\u201d \u04c8\u0438\u043b\u0435 \u04c8\u043e\u043d\u0434\u0430 \u04c8\u043e\u043d\u04d9 \u0441\u044f\u0440\u0443, \u0434\u04af\u0437\u044b\u0442\u04d9\u043d\u0434\u044b\u04c8 \u0438\u0445\u04af\u0442\u04af\u04c8 \u043d\u044f\u0433\u04d9\u04d9\u201d \u0441\u04af\u04e9\u0430\u0440\u0443\u0441\u04d9\u201d.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "14d1c1d94f755feee4c5765fa51fb448", + "text": "Niue Ko e tau tagata momoui oti kua fanau ai ke he fakatokanoaaga mo e fakatatai oti e tau tutuaga mo e tau tonuhia. Kua moua ai foki e lautolu e kakano mo e manamanatuaga ti kua lata ni ke fakafetui e taha ke he taha ke he agaga fakamatakainaga.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9164d07351a9366edfae5357e2ab807c", + "text": "Nomatsiguenga Antagaisati matsiguenga ibogaigu\u00eb matsiguengasonorl. Aisati icantaigaca. Teni iromerataiguengani. Antagaisati iquengaigui aisati ig\u00f3iguiro ora caninaro aisati ig\u00f3iguiro ora te onganinate. Iroro caninataque omagaro matsiguenga iraniacaninataigueri ira basiniati matsiguenga aisati ingantaiguer\u00ed ora caninaro.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a2d52f93737464a25abcd5d12c771b98", + "text": "Norwegian, Bokm\u00e5l Alle mennesker er f\u00f8dt frie og med samme menneskeverd og menneskerettigheter. De er utstyrt med fornuft og samvittighet og b\u00f8r handle mot hverandre i brorskapets \u00e5nd.", + "metadata": { + "languages": [ + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "0de9dab37169c4ded9b7f75bedf80c7f", + "text": "Norwegian, Nynorsk Alle menneske er f\u00f8dde til fridom og med same menneskeverd og menneskerettar. Dei har f\u00e5tt fornuft og samvit og skal leve med kvarandre som br\u00f8r.", + "metadata": { + "languages": [ + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "00ebc1efcc4358c32327bc6327f0a581", + "text": "Nuosu", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "dcfcf466590e9daa75e86df759c90a23", + "text": "\ua2bf\ua0b7\ua0c5\ua13f\ua428\ua425\uff0c\ua305\ua14d\ua002\ua3fd\ua42f\ua488\ua0c5\ua425\ua310\u3002\ua2bf\ua287\ua26a\ua346\ua30b\ua180\ua068\ua24c\ua44c\ua425\uff0c\ua137\ua00b\ua068\ua09b\ua2a8\ua16b\ua0c0\ua0c5\ua425\ua121\ua45f\u3002", + "metadata": { + "languages": [ + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "68861af146d56db218a932271da013ea", + "text": "Nyamwezi Banhu bose bubyalagwa biyagalulile, n\u2019ikujo haki zilenganelile.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2b70b2e37cc28ecb50a65c1977764c27", + "text": "Nyanja (Chechewa) Anthu onse amabadwa aufulu ndiponso ofanana mu ulemu ndi ufulu wao. Iwowa ndi wodalitsidwa ndi mphamvu zoganiza ndi chikumbumtima ndipo achitirane wina ndi mnzake mwaubale.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fd152d98beaa7e2f825aac9b4d031412", + "text": "Nyanja (Chinyanja) Anthu onse amabadwa mwa ufulu ndiponso olinganga m' makhalidwe ao. Iwo amakhala ndi nzeru za cibadwidwe kotero ayenera kucitirana zabwino wina ndi mnzace.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "a47d589be50e40faa0306403da28d30d", + "text": "Nyankore Abantu nibazaarwa baine obugabe nobushoborozi ebiri kwingana nibahangwa baine obwengye kandi barikubasa kwahura ekirungi nekibi, nahabwekyo abantu bashemereire kutuura kumwe nkabanya Uganda.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ba8f5d6e4dd82ab64f5d456db0bb5fe6", + "text": "Nyemba Vanu voxe vakasemuka mu cizango co mumo lika mu vulemu co kulimanena. Vakevo vakala na mangana co na mbunge co vana pande kulinga vamo na vakwavo na mbunge ya vuna yina.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8bb5a449ca76c9652411df83a16d36a5", + "text": "Nzema Menli muala di b\u025b ti anwo na eza noko b\u025bs\u025b w\u0254 dibil\u025b nee adenlenyianl\u025b nu. B\u025bl\u025b ndwenlenwo nee adwenle, yem\u0254ti \u0254w\u0254 k\u025b b\u025bkile adiemay\u025bl\u025b b\u025bmaa b\u025b nwo ngoko.", + "metadata": { + "languages": [ + "tur", + "swa", + "afr", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "945f5e12a8c939707776f2152604ea76", + "text": "Occitan T\u00f3uti lis uman naisson libre. Soun egau p\u00e8rla digneta e li dre. An t\u00f3uti uno resoun e uno counsci\u00e8nci. Se d\u00e8von tenifreirenau lis un 'm\u00e9 lis autre.", + "metadata": { + "languages": [ + "fra", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "de85ed5a407a19c2c1c89211693d8861", + "text": "Occitan (Auvergnat) Ta la proussouna neisson lieura mo\u00e9 parira p\u00e0 d\u00efness\u00e0 mai dret. Son charjada de razou mo\u00e9 de cousiens\u00e0 mai lhu fau arj\u00ee entreme\u00ee lha bei n'eime de freiress\u00e0.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6260219bc4a42037e7d6f0418b7284c5", + "text": "Occitan (Francoproven\u00e7al, Fribourg) Tot\u00e8 l\u00e8 dzin vinyon ou mondo libro \u00e8 par\u00ea in dinyit\u00e2 \u00e8 in dr\u00ea. Chon dot\u00e2 d\u00e8 r\u00e9jon \u00e8 d\u00e8 konhyinthe \u00e8 d\u00eavon ch\u00e8 konport\u00e2 l\u00e8 j\u2019on-l\u00e8 j\u2019\u00f4tro din on \u00e8chpri d\u00e8 frat\u00e8rnit\u00e2.", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b47382b7a0e0afd209aa7e1993565391", + "text": "Occitan (Francoproven\u00e7al, Savoie) Tu luz \u00f2m\u00f2 vinyon u mondo, libr\u00f2, tu t\u00f2ton p\u00e8 le\u00fb dinyit\u00f2 \u00e8 le\u00fb dr\u00e8ye. Y\u2019on tu d\u2019\u00e9m\u00f2 \u00e8 d\u00e8 konhyinhi \u00e8 i d\u00e8von f\u00e8- mouh\u00f2 d\u00e8 frat\u00e8rnit\u00f2 aou\u00e8y luz \u00f2tri.", + "metadata": { + "languages": [ + "ita", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "da6df9434bcea33fdb84c07309f23605", + "text": "Occitan (Francoproven\u00e7al, Valais) Tui l\u00e8 j\u00eatre humain n\u00e9chon libro \u00e8 pary in degnet\u00e2 \u00e9 in drou\u00ea. Chon reijon\u00e2bl\u00f3 \u00e8 d\u00e8 counchieince \u00e8 deivouon \u00e2zic l\u00e8 j\u2019oun vi j\u2019avi di j\u2019\u00e2tr\u00f3 in p\u00e8r oun espri d\u00e8 frat\u00e8rnit\u00e2", + "metadata": { + "languages": [ + "fra", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4be88083cf737cac6ec1b39afb2513c5", + "text": "Occitan (Francoproven\u00e7al, Vaud) T\u00ee l\u00e8 z\u2019\u00eetre humain v\u00eegnant \u00e2o mondo libro et par\u00e2i dein la dignit\u00e2 et l\u00e8 dr\u00e2i. L\u2019ant re\u00e7u r\u00e9son et concheince et d\u00e2ivant vivre l\u00e8 z\u2019on avou\u00e9 l\u00e8 z\u2019autro quemet se sant fr\u00e2re et ch\u00e8ra.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca97829bba2e332be352861c0d0e0c70", + "text": "Occitan (Languedocien) Totes los \u00e8ssers umans naisson liures e egals en dignitat e en dreches. Son dotats de rason e de consci\u00e9ncia e se devon comportar los unes amb los autres dins un esperit de fraternitat.", + "metadata": { + "languages": [ + "cat", + "fra", + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2c541386adb644071a67fa19c80d221f", + "text": "Ojibwa, Northwestern \u146d\u1472\u14c7\u140c\u14c0\u14d0 \u1472\u1431\u14aa\u144e\u14ef\u1417\u1466 \u14c2\u1455\u140e\u146d\u1417\u1483 \u144e\u142f\u14c2\u14a5\u144e\u14f1\u140e\u14c2\u1483 \u14a5\u14c7 \u1455\u1431\u1455 \u146d\u148b\u1403\u14c0\u1455\u146f\u14ef\u140e\u14d0 \u1472\u1526 \u144c\u1438\u146b\u1455\u146f\u14ef\u140e\u14d0. \u1405\u1455\u1526\u14c7\u1417 \u14a5\u1472\u140e\u140e\u14d0 \u1472\u1526 \u14c2\u1444\u1472\u140e\u14d0 \u14a5\u14c7\u1417 \u1455\u1525 \u148b\u1403\u1511\u1472\u14c7\u1417\u1438\u144e\u1417\u1438\u14d0 \u140a\u1490\u146f \u14a5\u14c4\u140e\u148b\u140e\u144e\u140e\u14c2\u1483.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "1770d7b5d51d295d22599366d8285ccc", + "text": "Okiek Piik togol kosigotiik en katiagetapkei koguyet ak imandanyuwan koyuyosin togol kogigigochi ngomnotet ak koperuret en iyon konyolu koyochigei oteptop tupchondit.", + "metadata": { + "languages": [ + "est", + "tgl", + "hun", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "838854e8c37bc2424bd4b8b4324da0a4", + "text": "Orok \u0427\u0438\u043f\u0430\u0304\u043b\u0438 \u0433\u0443\u0440\u0443\u043d\u043d\u0435\u0304 \u0431\u0430\u043b\u04e1\u0438\u0447\u0438 \u0433\u044d\u0432\u0443\u043c\u044d, \u043e\u043c\u043e\u0442\u0442\u043e \u043c\u044d\u0304\u043d\u044d \u043c\u04e9\u0440\u04e9\u043d\u04e1\u0438, \u043c\u044d\u0304\u043d\u044d \u0434\u043e\u0440\u043e\u043d\u04e1\u0438. \u041d\u043e\u0304\u0447\u0438 \u0438\u0434\u044d\u043b\u0443, \u0438\u0440\u043a\u0430\u043b\u0443, \u043c\u044d\u0304\u043d\u044d \u043c\u044d\u0304\u043d\u04e1\u0438 \u043d\u0430\u0304\u0434\u0430\u043a\u0442\u0430\u04c8\u0430\u0447\u0438 \u0431\u0458\u04e3\u0447\u0438.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "58f4dadcdcd7410be6d204f2287e31c4", + "text": "Oromo, Borana-Arsi-Guji Namooti hundinuu birmaduu ta'anii mirgaa fi ulfinaanis wal-qixxee ta'anii dhalatan. Sammuu fi qalbii ittiin yaadan waan uumamaan kennameef, hafuura obbolummaatiin walii-wajjin jiraachuu qabu.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "94e7fb62cfa3b7bce4161724caed0203", + "text": "Oroqen Beyel bambur zhiyu bishi, zhunyan-du bineken chuanli-du bambur pingdeng bishi. Nugartin lishing bineken liangshin bishi, akin nekun guanshi-ngi chingshen-du-in duidai-meet-ki-tin.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "61b9c386f4d7f982e217e8a0973deae9", + "text": "Osetin \u0410\u0434\u04d5\u0439\u043c\u04d5\u0433\u0442\u04d5 \u0441\u0435 '\u043f\u043f\u04d5\u0442 \u0434\u04d5\u0440 \u0440\u0430\u0439\u0433\u0443\u044b\u0440\u044b\u043d\u0446 \u0441\u04d5\u0440\u0438\u0431\u0430\u0440\u04d5\u0439 \u04d5\u043c\u04d5 \u04d5\u043c\u0445\u0443\u044b\u0437\u043e\u043d\u04d5\u0439 \u0441\u04d5 \u0431\u0430\u0440\u0442\u044b. \u0423\u044b\u0434\u043e\u043d \u04d5\u0445\u0445\u04d5\u0441\u0442 \u0441\u0442\u044b \u0437\u043e\u043d\u0434 \u04d5\u043c\u04d5 \u043d\u0430\u043c\u044b\u0441\u04d5\u0439, \u04d5\u043c\u04d5 \u043a\u04d5\u0440\u04d5\u0434\u0437\u0438\u0439\u04d5\u043d \u0445\u044a\u0443\u0430\u043c\u04d5 \u0443\u043e\u0439 \u04d5\u0444\u0441\u044b\u043c\u04d5\u0440\u0442\u044b \u0445\u0443\u044b\u0437\u04d5\u043d.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f829c47775b5845587447d35b6b41e40", + "text": "Otomi, Mezquital Gotho nu kja'ni i mu\u0331i ra zoo i gotho ro kuchti, i tu'ni nu ro \u00f1a pad\u00e4 bini i da budi, da mu\u0331i ra zoo koyu gotho yu kja'ni i yo kuadi.", + "metadata": { + "languages": [ + "hrv", + "swa", + "cym" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a397f19eefe134a148362c0ba710828f", + "text": "Otuho lsiuni aati dang iko ahodc hade ihaniere erre boo ve isi orrijori dang to nelotulo. Owoni isi iko negigilita bwo ve iko ataja. Ongida isi ihanie awatek hosi ihwo elarak.", + "metadata": { + "languages": [ + "slv", + "est", + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dd2ab495e062b9a11fe24355a3c1319e", + "text": "P\u00e1ez Ya'nwe'wewa'te' maa nasapa ha'dacehk hi'pku up'hi', w\u00ebtte u'huwa'hi'pta', eena' eena' f'i'zewa' hi'pta', \u00fcus hi'pta' d'ik'the hi'pta' naapa'kate. Sa' h'ukaysa \u00fcus hi'pcehktha'w sa' pyakhna'we f'i'ze hi'ptha'w.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b4f294da67df35573403a536b2853dd4", + "text": "Palauan A rogui 'l chad el mechell a ngarngii a ilmokl er tir ra diosisiu el llemalt. Ngarngii er tir a uldesuir mete mo meruul el mo rar bebil lokiu a ungil 'l omeruul ra klauchad.", + "metadata": { + "languages": [ + "ron", + "cat", + "ita", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ffd211be4b0b2eabd1a1792cc2b8afa1", + "text": "Pampangan Ding sablang tau mibait lang malaya at pante-pante king karangalan at karapatan. Ila mipagkaluban lang katuliran at konsensiya ay dapat misaupan king diwang pamikapatiran.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "068d755c0e132506c2d31786a7ed4b32", + "text": "Panjabi, Eastern \u0a38\u0a3e\u0a30\u0a3e \u0a2e\u0a28\u0a41\u0a71\u0a16\u0a40 \u0a2a\u0a30\u0a3f\u0a35\u0a3e\u0a30 \u0a06\u0a2a\u0a23\u0a40 \u0a2e\u0a39\u0a3f\u0a2e\u0a3e, \u0a36\u0a3e\u0a28 \u0a05\u0a24\u0a47 \u0a39\u0a71\u0a15\u0a3e\u0a02 \u0a26\u0a47 \u0a2a\u0a71\u0a16\u0a4b\u0a02 \u0a1c\u0a28\u0a2e \u0a24\u0a4b\u0a02 \u0a39\u0a40 \u0a06\u0a5b\u0a3e\u0a26 \u0a39\u0a48 \u0a05\u0a24\u0a47 \u0a38\u0a41\u0a24\u0a47 \u0a38\u0a3f\u0a71\u0a27 \u0a38\u0a3e\u0a30\u0a47 \u0a32\u0a4b\u0a15 \u0a2c\u0a30\u0a3e\u0a2c\u0a30 \u0a39\u0a28 \u0964 \u0a09\u0a28\u0a4d\u0a39\u0a3e\u0a02 \u0a38\u0a2d\u0a28\u0a3e \u0a28\u0a42\u0a70 \u0a24\u0a30\u0a15 \u0a05\u0a24\u0a47 \u0a5b\u0a2e\u0a40\u0a30 \u0a26\u0a40 \u0a38\u0a4c\u0a17\u0a3e\u0a24 \u0a2e\u0a3f\u0a32\u0a40 \u0a39\u0a4b\u0a08 \u0a39\u0a48 \u0a05\u0a24\u0a47 \u0a09\u0a28\u0a4d\u0a39\u0a3e\u0a02 \u0a28\u0a42\u0a70 \u0a2d\u0a30\u0a3e\u0a24\u0a30\u0a40\u0a2d\u0a3e\u0a35 \u0a26\u0a40 \u0a2d\u0a3e\u0a35\u0a28\u0a3e \u0a30\u0a16\u0a26\u0a3f\u0a06\u0a02 \u0a06\u0a2a\u0a38 \u0a35\u0a3f\u0a1a \u0a35\u0a3f\u0a1a\u0a30\u0a23\u0a3e \u0a1a\u0a3e\u0a39\u0a40\u0a26\u0a3e \u0a39\u0a48 \u0964", + "metadata": { + "languages": [ + "pan" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "e81229801afdd767a6ca59c9877783bc", + "text": "Panjabi, Western \u0633\u0627\u0631\u06d2 \u0627\u0646\u0633\u0627\u0646 \u0622\u0632\u0627\u062f \u062a\u06d2 \u062d\u0642\u0648\u0642 \u062a\u06d2 \u0639\u0632\u062a \u062f\u06d2 \u0644\u062d\u0627\u0638 \u0646\u0627\u0644 \u0628\u0631\u0627\u0628\u0631 \u067e\u06cc\u062f\u0627 \u06c1\u0648\u0646\u062f\u06d2 \u0646\u06cc\u06ba \u06d4 \u06d4 \u0627\u0648\u06c1 \u0639\u0642\u0644 \u0633\u0645\u062c\u06be \u062a\u06d2 \u0686\u0646\u06af\u06d2 \u0645\u0646\u062f\u06d2 \u062f\u06cc \u067e\u0686\u06be\u0627\u0646 \u062a\u06d2 \u0627\u062d\u0633\u0627\u0633 \u0631\u06a9\u06be\u062f\u06d2 \u0646\u06d2 \u0627\u06cc\u0633 \u0648\u0627\u0633\u0637\u06d2 \u0627\u0648\u06c1\u0646\u0627\u06ba \u0646\u0648\u06ba \u0627\u06a9 \u062f\u0648\u062c\u06d2 \u0646\u0627\u0644 \u0628\u06be\u0627\u0626\u06cc \u0686\u0627\u0631\u06d2 \u0648\u0627\u0644\u0627 \u0633\u0644\u0648\u06a9 \u06a9\u0631\u0646\u0627 \u0686\u0627\u06c1\u06cc \u062f\u0627 \u0627\u06d2 \u06d4 \u06d4", + "metadata": { + "languages": [ + "urd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4f3aebd4854cd6b0308eca4661657a32", + "text": "Papiamentu Tur ser humano ta nace liber y igual den dignidad y den derecho. Nan ta dota cu rason y cu consenshi y nan mester comporta nan den spirito di fraternidad pa cu otro.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "a2c1dda9330915ecdfba4af7c21da5c0", + "text": "Pashto, Northern \u062f \u0628\u0634\u0631 \u067c\u0648\u0644 \u0627\u0641\u0631\u0627\u062f \u0627\u0632\u0627\u062f \u0646\u0693\u06cd \u062a\u0647 \u0631\u0627\u0681\u064a \u0627\u0648 \u062f \u062d\u064a\u062b\u064a\u062a \u0627\u0648 \u062f \u062d\u0642\u0648\u0642\u0648 \u0644\u0647 \u067e\u0644\u0648\u0647 \u0633\u0631\u0647 \u0628\u0631\u0627\u0628\u0631 \u062f\u064a\u06d4 \u067c\u0648\u0644 \u062f \u0639\u0642\u0644 \u0627\u0648 \u0648\u062c\u062f\u0627\u0646 \u062e\u0627\u0648\u0646\u062f\u0627\u0646 \u062f\u064a \u0627\u0648 \u0628\u0627\u064a\u062f \u064a\u0648 \u0644\u0647 \u0628\u0644 \u0633\u0631\u0647 \u062f \u0648\u0631\u0648\u0631\u06cd \u067e\u0647 \u0631\u0648\u062d\u064a\u0647 \u0633\u0631\u0647 \u0686\u0644\u0646\u0646\u062f \u06a9\u0693\u064a\u06d4", + "metadata": { + "languages": [ + "fas" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7e9ad6a402b6252e85be01ffafa1eb5e", + "text": "Picard Tos l\u00e8s-omes vin\u00e8t \u00e5 monde l\u00eebes \u00e8t \u00e9g\u00e5ls po \u00e7ou qu'\u00e8st d' le\u00fb dignit\u00e9 \u00e8t d' le\u00fbs dre\u00fbts. Le\u00fb re\u030azon \u00e8t le\u00fb consyince elz\u00ee fe\u030at on d'vw\u00e9r di s'kid\u00fbre inte di z\u00e8le come d\u00e8s fr\u00e8s", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "323d61680164a581d21b57bdcbf0d26e", + "text": "Pidgin, Nigerian Everi human being, naim dem born free and dem de equal for dignity and di rights wey we get, as human beings, God come give us beta sense wey we de take tink well, well and beta mind, sake for dis, we must to treat each other like broda and sister.", + "metadata": { + "languages": [ + "eng", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ac876cc3346916765112fe76163aaf80", + "text": "Pijin Evri man en mere olketa born frii en ikwol lo digniti en raits blo olketa. Olketa evriwan olketa garem maeni fo tingting en olketa sapos fo treatim isada wittim spirit blo bradahood.", + "metadata": { + "languages": [ + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "47fc36adefb94fbc8ce77cce0ef3cd95", + "text": "Pintupi-Luritja Nganana maru tjuta, tjulkura tjuta, manta yurungka parrari nyinapayi tjutanya liipulala nyinanyi, nganana yanangu maru tjuta wiya kuyakuya. Yuwankarrangkuya palya nintingku kulini. Tjanaya palya kutjupa tjutaku tjukarurru nyinanytjaku, walytja tjuta nguwanpa, mingarrtjuwiya. Tjungungku palyangku kurrunpa kutjungku.Wangka ngaangku nganananya tjakultjunanyi rapa ngaranytjaku kutjupa tjuta nguwanpa.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "b3fdb7f8753265489381d2f45c0d5b0a", + "text": "Pipil Muchi ne tay gen tu weyga nestiwit tamagixti genga tik ekneliat wan ipal wan gichiwtiwit ipal ma munegigan ne se pal ne se.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ec8e1439bfe9914fdac211b8f24455dd", + "text": "Pohnpeian Tohn sampa karos ipwiwei nan saledek oh duwepenehte nan arail wasa oh arail pwung. Arail marain oh pehm ih utakerail kahrehda korusie konehng sawaspene nin duwen pirien ehu.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cad1fbc2c59a2ab610912476278d0204", + "text": "Polish Wszyscy ludzie rodz\u0105 si\u0119 wolni i r\u00f3wni pod wzgl\u0119dem swej godno\u015bci i swych praw. S\u0105 oni obdarzeni rozumem i sumieniem i powinni post\u0119powa\u0107 wobec innych w duchu braterstwa.", + "metadata": { + "languages": [ + "pol" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "07022bc1c3bb5010208399375dc1b813", + "text": "Portuguese (Brazil) Todos os seres humanos nascem livres e iguais em dignidade e direitos. S\u00e3o dotados de raz\u00e3o e consci\u00eancia e devem agir em rela\u00e7\u00e3o uns aos outros com esp\u00edrito de fraternidade.", + "metadata": { + "languages": [ + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7925a3ec12f3766bebb236e3ec5bdc60", + "text": "Portuguese (Portugal) Todos os seres humanos nascem livres e iguais em dignidade e em direitos. Dotados de raz\u00e3o e de consci\u00eancia, devem agir uns para com os outros em esp\u00edrito de fraternidade.", + "metadata": { + "languages": [ + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4db9c27acd6b5a924d0ac2dde81f03ac", + "text": "Pular NEDDHANKE EN FOW DYIBINTE NO HETTII NO FOTA E DHI FOW, E NDIMU E HANDANDHI. BHE DYIBINDINTE E HAGGHIL E FAAMU ; HIBHE HAANI DYOGONDIRDE E NDER HAGGHIL NEENEGOOTAANKAAKU.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dc4348bae7eccbd8e30af1763958fee9", + "text": "Pular (Adlam) \ud83a\udd0b\ud83a\udd32\ud83a\udd46\ud83a\udd22\ud83a\udd25\ud83a\udd22 \ud83a\udd22\ud83a\udd44\ud83a\udd23\ud83a\udd2b\ud83a\udd45\ud83a\udd36\ud83a\udd2d \ud83a\udd2c\ud83a\udd2e\ud83a\udd2c \ud83a\udd28\ud83a\udd2e\ud83a\udd3c\ud83a\udd2d\u060c \ud83a\udd32'\ud83a\udd23\ud83a\udd2d\ud83a\udd25\ud83a\udd2f\ud83a\udd2d\ud83a\udd23\ud83a\udd2d \ud83a\udd2b \ud83a\udd36\ud83a\udd2d\ud83a\udd26\ud83a\udd2d\ud83a\udd32\ud83a\udd22\ud83a\udd32\ud83a\udd46\ud83a\udd23\ud83a\udd2b \ud83a\udd3c\ud83a\udd2e \ud83a\udd26\ud83a\udd22\ud83a\udd32\ud83a\udd46\ud83a\udd3a\ud83a\udd2b \ud83a\udd38\ud83a\udd22\ud83a\udd33\ud83a\udd46\ud83a\udd2b\ud83a\udd45\ud83a\udd36\ud83a\udd2d. \ud83a\udd09\ud83a\udd29\ud83a\udd2b \ud83a\udd32'\ud83a\udd3a\ud83a\udd2e\ud83a\udd45\ud83a\udd23\ud83a\udd2d \ud83a\udd25\ud83a\udd2d\ud83a\udd45\ud83a\udd36\ud83a\udd2e \ud83a\udd2b \ud83a\udd38\ud83a\udd22\ud83a\udd33\ud83a\udd46\ud83a\udd2d\ud83a\udd24\ud83a\udd22\ud83a\udd32\ud83a\udd3c\ud83a\udd22\ud83a\udd44\ud83a\udd3a\ud83a\udd22\ud83a\udd24 \ud83a\udd2b\ud83a\udd3c\ud83a\udd2b \ud83a\udd2b\ud83a\udd29\ud83a\udd2b \ud83a\udd28\ud83a\udd2e\ud83a\udd3c\ud83a\udd2d \ud83a\udd38\ud83a\udd35\ud83a\udd45\ud83a\udd2c\ud83a\udd2e \ud83a\udd32'\ud83a\udd23\ud83a\udd2d\ud83a\udd2a\ud83a\udd23\ud83a\udd2b \ud83a\udd2b \ud83a\udd32'\ud83a\udd23\ud83a\udd2b\ud83a\udd2a \ud83a\udd29 \ud83a\udd2d\ud83a\udd34\ud83a\udd32\ud83a\udd3a\ud83a\udd35\ud83a\udd34\ud83a\udd35\ud83a\udd25\ud83a\udd46\ud83a\udd22\ud83a\udd44\ud83a\udd3a\ud83a\udd35.", + "metadata": { + "languages": [ + "ara" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9c7d0e713be2017eba040780765856df", + "text": "Purepecha Iamendu k'uiripuecha janguarhiparini ka majku jarhati ka jurhimbekuecha jingoni kueraa\u014basondikso ka, juajtakuarhis\u00efndiks\u00ef ambakiti eratsekua ka kaxumbikua, jatsistiks\u00ef eskaks\u00ef sesi arhijperaaka.", + "metadata": { + "languages": [ + "est", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "9478aa88ff2306d5f2acc6b92e055546", + "text": "Q'eqchi' Chijunil li poyanam juntaq'eet wankil xloq'al naq nake'yo'la, ut kama' ak reheb' naq wan xna'leb'eb ut nake'reek'a rib', tento naq te'xk'am rib' sa' usilal chirib'ilrib'eb'.", + "metadata": { + "languages": [ + "ind", + "swa", + "som", + "cat", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e7cb3a61bb828a46ce008b4251df5ef3", + "text": "Quechua, Ambo-Pasco Lapan runa kay pachach'u yurin libri kawananpaq, lapanchinuy iwal respetasha kananpaqmi, mana pipis jarup\u00e4nanpaq, lapanpis iwal yarpach'akuy yach'aqmi, alita mana alita tantiyar kawananpaq. Chaynuy runa masinwan juknin jukninwan kuyanakur kap\u00e4kuchun", + "metadata": { + "languages": [ + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7af8d8dd7e7418eed6057bb221448506", + "text": "Quechua, Arequipa-La Uni\u00f3n Kanmi derechonchiskuna llapanchispa, nacesqanchismanta. Kantaqmi llapanchispa runa kayninchis. Manan runa kanchu manay derechoyoq. Huk runaq derecho hukpawan kaqllan kan. Kanmi derechonchis llapanchispa allin kawsay libre tiyananchispaq. Llapan runaqpan kan yuyayninchis yachanapaq. Llapanchis kasun llapa runa masinchiskunawan munanakunapaq, huk ayllu hina.", + "metadata": { + "languages": [ + "tgl", + "ind", + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d4c2a1e138b9d930e777cdaf26a52733", + "text": "Quechua, Ayacucho Lliw runakunam nacesqanchikmantapacha libre kanchik, lliw derechonchikpipas iguallataqmi kanchik. Yuyayniyoq kasqanchikraykum hawkalla aylluntin hina kawsayta debenchik llapa runakunawan.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f4bb6dc9e8533755a35983f62fd63e34", + "text": "Quechua, Cajamarca Yumbay ollqokuna, warmikuna pullalla kashun leyninchiqkunawan. Manam ni pipapis kriyadunchu kanchiqllapa. Suqninchiq, suqninchiq atinchiqllapa yuyayta \"imam alli, imam mana allichu\" nishpa. Chayshina kaptin, shumaqta tiyashunllapa suq ayllushinalla.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9cd272d47101a5545b07fa98899d9f70", + "text": "Quechua, Cusco Llapa runan kay pachapi paqarin qispisqa, \"libre\" flisqa, allin kausaypi, chaninchasqa kausaypi kananpaq, yuyayniyoq, yachayniyoq runa kasqanman jina. Llapa runamasinwantaqmi wauqentin jina munanakunan.", + "metadata": { + "languages": [ + "swa", + "tgl", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7838a28da590ff7bb2ea5c7a48ba93fc", + "text": "Quechua, Huamal\u00edes-Dos de Mayo Hu\u00e1nuco Lapan runakunapis yurikuyan librimi y wakinkaqkunanaw rispitashqa, mana jarukushqa kay\u00e4nanpaq. Saynawmi runakunaqa yuriyan shumaq yarpayyuq, alitapis mana alitapis reqiykar y seqay kuyap\u00e4kuyyuq. Saymi runakuna ali kawakuy\u00e4nan jukninwan jukninwanpis.", + "metadata": { + "languages": [ + "swa", + "som", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "08720fc9c770f44e38435bc27b49867d", + "text": "Quechua, Huaylas Ancash Meyqan nunapis manam pipa sirweqnin nuna kananpaqtsu yurikushqa. I nuna karninmi meyqan nunapis juk l\u00e1yatsu kayanman der\u00ebchunkunachowpis. I yarpachakiyta yacharninmi i allita mana allita shonqonkunachow m\u00e1kurninmi nunakuna jukninta wiyanakur kayanman.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "34a8df5528e399552e033b89176957b0", + "text": "Quechua, Margos-Yarowilca-Lauricocha Lapantsikunapis Iibrimi yurishqantsi. B\u00e4lintsimi y der\u00ebchuntsikunapis wakinkaqkunanoqlapami. Yarpaynintsikunapis kaykanmi runa mayintsikunawan juk wawqinoq kuyanakur kawap\u00e4kunantsipaq.", + "metadata": { + "languages": [ + "ind", + "tgl", + "fin", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "68b63eb96c576d943261ea39555162be", + "text": "Quechua, Northern Conchucos Ancash Mayqan runapis manam pipa isklabun kananpaqtsu yurishqa. Y runa karninmi llapan runakuna iwal kayanman dirichunkunachawpis. Y yarpayta yacharninmi y allita mana allita shunqunkunachaw makurninmi runakuna huknin hukninta rispitanakur kayanman.", + "metadata": { + "languages": [ + "ind", + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ecc5d074ce9be67e187d19b4aabf87c5", + "text": "Quechua, North Jun\u00edn Lapan runas kay pachachru nasimun juk rantisha runanuy mana pitas sirbinanpaqmi, alipa rikasha kananpaqmi, washasha kananpaqmi. Lapan runakunas nasipaakamun yarpayniyoqmi naatan tantiyayniyoqmi ima lutanta rurapaakurursi tantiyakunanpaq. Lapan runakunas kawapaakunaman juk wawqenuylam.", + "metadata": { + "languages": [ + "tgl", + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "af8bad9d59da3dc7cc6e613e743d2e7f", + "text": "Quechua, South Bolivian Tukuy kay pachaman paqarimujkuna libres nasekuntu tukuypunitaj kikin obligacionesniycjllataj, jinakamalla honorniyojtaj atiyniyojtaj, chantaqa razonwantaj concienciawantaj dotasqa kasqankurayku, kawsaqe masipura jina, tukuy uj munakuyllapi kawsakunanku tian.", + "metadata": { + "languages": [ + "ind", + "swa", + "tgl", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "654791ed821f84e420d3742634a53e7c", + "text": "Quechua (Unified Quichua, old Hispanic orthography) Tucuy runacuna quishpirihu\u00e1n hui\u00f1\u00e1n, pactacunahuampes, pay pura, umahu\u00e1n, ayahu\u00e1n chay shucuna shina, chaymantami shuclla shina causangacuna.", + "metadata": { + "languages": [ + "spa", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "fed959145c5a6c3d8da63241f6de77c5", + "text": "Quichua, Chimborazo Highland Tukuy runakunami maypipash kishpirishka, sumaykaypi(dignidad) paktapakta wacharin. Chay wawakunaka sumak yuyaykuna, tiksiyuyay (fundamental), huntami kan; chaymantami runapuraka shukllashina tukushpa, yanaparishpa kawsana kan.", + "metadata": { + "languages": [ + "ind", + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8dc5fe5e7e5f4841e4a057d528ffb483", + "text": "Rarotongan Kua anau rangatira ia te tangata katoatoa ma te aiteite i te au tikaanga e te tu ngateitei tiratiratu. Kua ki ia ratou e te mero kimi ravenga e te akavangakau e kia akono tetai i tetai, i roto i te vaerua piri anga taeake.", + "metadata": { + "languages": [ + "ind", + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f0f216272ee0f7e11e21eb4ca1752777", + "text": "Romagnolo Tot j ess\u00e8ri um\u00e8n i n\u00e0s l\u00e9bri e cumpagn in dignit\u00e0 e dir\u00e9t. Lou i \u00e8 dutid ad rasoun e ad cuscinza e i \u00e0 da oper\u00e8, ognun ti cunfrunt at ch'j ilt, sa sentimint ad fratel\u00e8nza.", + "metadata": { + "languages": [ + "ita", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a84b6ff398b4f815054e7b47107ce163", + "text": "Romani, Balkan Savorre manu\u015ba biand\u00f5n meste thaj barabar k-o demnipen aj k-e hakaja. Si len godi aj somzanipen thaj si len te tr\u0105den pen jekh karing o aver and-o vogi e phralimnasqoro.", + "metadata": { + "languages": [ + "slv", + "swe" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dd72113ef6db4b69482adf28078a6090", + "text": "Romani, Balkan (1) Sa e manu\u0161ikane strukture bijand\u017eona tromane thaj jekhutne ko digniteti thaj \u010dapipa. Von si baxtarde em barvale gndaja thaj god\u017eaja thaj trubun jekh avereja te kherjakeren ko vod\u017ei pralipaja.", + "metadata": { + "languages": [ + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d1d78e5ce9c3fe2071093b3f74f8f9b8", + "text": "Romanian (1953) Toate fiin\u021bele umane se nasc libere \u0219i egale \u00een demnitate \u0219i \u00een drepturi. Ele s\u00eent \u00eenzestrate cu ra\u021biune \u0219i con\u0219tiin\u021b\u0103 \u0219i trebuie s\u0103 se comporte unele fa\u021b\u0103 de altele \u00een spiritul fraternit\u0103\u021bii.", + "metadata": { + "languages": [ + "ron" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ffd7f486f85cc12fffdee64c8dc1c47c", + "text": "Romanian (1993) Toate fiin\u021bele umane se nasc libere \u0219i egale \u00een demnitate \u0219i \u00een drepturi. Ele sunt \u00eenzestrate cu ra\u021biune \u0219i con\u0219tiin\u021b\u0103 \u0219i trebuie s\u0103 se comporte unele fa\u021b\u0103 de altele \u00een spiritul fraternit\u0103\u021bii.", + "metadata": { + "languages": [ + "ron" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "81db31b50da57a040bad82d9af2297df", + "text": "Romanian (2006) Toate fiin\u021bele umane se nasc libere \u0219i egale \u00een demnitate \u0219i \u00een drepturi. Ele sunt \u00eenzestrate cu ra\u021biune \u0219i con\u0219tiin\u021b\u0103 \u0219i trebuie s\u0103 se comporte unele fa\u021b\u0103 de altele \u00een spiritul fraternit\u0103\u021bii.", + "metadata": { + "languages": [ + "ron" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cadc80db78bd586f5f18217272cfdb17", + "text": "Romansch Tuots umans naschan libers ed eguals in dignit\u00e0 e drets. Els sun dotats cun intellet e conscienza e dessan agir tanter per in uin spiert da fraternit\u00e0.", + "metadata": { + "languages": [ + "cat", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4295c14118d555a1bd3be37701a4578e", + "text": "Romansch (Grischun) Tut ils umans naschan libers ed eguals en dignitad ed en dretgs. Els \u00e8n dotads cun raschun e conscienza e duain agir in vers l\u2019auter en spiert da fraternitad.", + "metadata": { + "languages": [ + "deu", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d7c3646cc8bf5af91fa007bcdc86ad53", + "text": "Romansch (Puter) Tuot ils umauns naschan libers ed eguels in dignited ed in drets. Els sun dotos cun radschun e conscienza e dessan agir \u00fcn invers l\u2019oter in spiert da fraternited.", + "metadata": { + "languages": [ + "deu", + "cat", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a0daace15fe9f49d73fcdd9e3b86f001", + "text": "Romansch (Surmiran) Tot igls carstgangs neschan libers ed eguals an dignitad ed an dretgs. Els \u00e8n dotos cun raschung e schientscha e duessan ager l\u2019egn vers l\u2019oter an spiert da fraternitad.", + "metadata": { + "languages": [ + "cat", + "deu", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "57126ecde8022743581d3932507d8b63", + "text": "Romansch (Sursilvan) Tut ils humans neschan libers ed eguals en dignitad ed en dretgs. Els ein dotai cun raschun e cunscienzia e duein agir in viers l\u2019auter en sp\u00e9rt da fraternitad.", + "metadata": { + "languages": [ + "deu", + "nld", + "fra", + "cat", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "82fb166f28096b77e6b865ce44135e16", + "text": "Romansch (Sutsilvan) Tut igls humans neschan libers ad eguals an dignitad ad an dretgs. Els en dotos cun rasch\u00f9n a cunzienzia a den agir egn anviers l\u2019oter an spiert da fraternitad.", + "metadata": { + "languages": [ + "cat", + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "53246b60d8dbe52f7f323cfe27507738", + "text": "Romansch (Vallader) Tuot ils umans naschan libers ed eguals in dignit\u00e0 ed in drets. Els sun dotats cun radschun e conscienza e dessan agir \u00fcn invers l\u2019oter in \u00fcn spiert da fraternit\u00e0.", + "metadata": { + "languages": [ + "cat", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "97e1a02de327531973da6bb83feba879", + "text": "Rundi Abantu bose bavuka bishira bakizana kandi bangana mu gateka no mu ngingo zibubahiriza. Bafise ubwenge n'umutima kandi bategerezwa kwubahana nk'abavandimwe.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7b1fe5da3cfa2322dd960a870a966d3a", + "text": "Russian \u0412\u0441\u0435 \u043b\u044e\u0434\u0438 \u0440\u043e\u0436\u0434\u0430\u044e\u0442\u0441\u044f \u0441\u0432\u043e\u0431\u043e\u0434\u043d\u044b\u043c\u0438 \u0438 \u0440\u0430\u0432\u043d\u044b\u043c\u0438 \u0432 \u0441\u0432\u043e\u0435\u043c \u0434\u043e\u0441\u0442\u043e\u0438\u043d\u0441\u0442\u0432\u0435 \u0438 \u043f\u0440\u0430\u0432\u0430\u0445. \u041e\u043d\u0438 \u043d\u0430\u0434\u0435\u043b\u0435\u043d\u044b \u0440\u0430\u0437\u0443\u043c\u043e\u043c \u0438 \u0441\u043e\u0432\u0435\u0441\u0442\u044c\u044e \u0438 \u0434\u043e\u043b\u0436\u043d\u044b \u043f\u043e\u0441\u0442\u0443\u043f\u0430\u0442\u044c \u0432 \u043e\u0442\u043d\u043e\u0448\u0435\u043d\u0438\u0438 \u0434\u0440\u0443\u0433 \u0434\u0440\u0443\u0433\u0430 \u0432 \u0434\u0443\u0445\u0435 \u0431\u0440\u0430\u0442\u0441\u0442\u0432\u0430.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6bf0412e57e78aa58fbf28eb5d55ecb1", + "text": "Rwanda Abantu bose bavuka aliko bakwiye agaciro no kwubahwa kimwe. Bose bavukana ubwenge n'umutima, bagomba kugilirana kivandimwe.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "48332b010fe58bc794e833308da30575", + "text": "Saami, North Buot olbmot leat rieg\u00e1dan friddjan ja olmmo\u0161\u00e1rvvu ja olmmo\u0161vuoigatvuo\u0111aid d\u00e1fus. Sii leat jierbmala\u0161 olbmot geain lea oamedovdu ja sii g\u00e1lgga\u0161e leat dego vielja\u010dagat.", + "metadata": { + "languages": [ + "est", + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "373656c2cab80370dd2768316c8a725e", + "text": "Salar Heme kishler h\u00fcr der, haysiyet ma haklarde adil der, mantik ma vicdan var, kardeshlikden davraneshge.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3b9323b658ffe68e575944ba1afb73e3", + "text": "Samoan O tagata soifua uma ua saoloto lo latou fananau mai, ma e tutusa o latou tulaga aloaia faapea a latou aia tatau. Ua faaeeina atu i a latou le mafaufau lelei ma le loto fuatiaifo ma e tatau ona faatino le agaga faauso i le va o le tasi i le isi,", + "metadata": { + "languages": [ + "ita", + "swa", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ddfa143fc42a89f1e4f7b99ce0028962", + "text": "Sango Ad\u00fc \u00e2zo k\u00fb\u00ea yamba, ng\u00e2 \u00e2la l\u00eengbi ter\u00ea na l\u00eag\u00eb t\u00ee n\u00ebng\u00f6-ter\u00ea na t\u00ee \u00e2ngang\u00fc. Ala k\u00fb\u00ea awara ndar\u00e4 na b\u00f6r\u00f6-li s\u00ef \u00e2la l\u00eengbi t\u00ee dut\u00ef na \u00e2mb\u00e2 t\u00ee \u00e2la g\u00ef na l\u00eang\u00f6 s\u00f6ng\u00f6.", + "metadata": { + "languages": [ + "tgl", + "vie" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "ba8456690a521bd0fb0bb757c188f302", + "text": "Sanskrit \u0938\u0930\u094d\u0935\u0947 \u092e\u093e\u0928\u0935\u093e\u0903 \u0938\u094d\u0935\u0924\u0928\u094d\u0924\u094d\u0930\u093e\u0903 \u0938\u092e\u0941\u0924\u094d\u092a\u0928\u094d\u0928\u093e\u0903 \u0935\u0930\u094d\u0924\u0928\u094d\u0924\u0947 \u0905\u092a\u093f \u091a, \u0917\u094c\u0930\u0935\u0926\u0943\u0936\u093e \u0905\u0927\u093f\u0915\u093e\u0930\u0926\u0943\u0936\u093e \u091a \u0938\u092e\u093e\u0928\u093e\u0903 \u090f\u0935 \u0935\u0930\u094d\u0924\u0928\u094d\u0924\u0947\u0964 \u090f\u0924\u0947 \u0938\u0930\u094d\u0935\u0947 \u091a\u0947\u0924\u0928\u093e-\u0924\u0930\u094d\u0915-\u0936\u0915\u094d\u0924\u093f\u092d\u094d\u092f\u093e\u0902 \u0938\u0941\u0938\u092e\u094d\u092a\u0928\u094d\u0928\u093e\u0903 \u0938\u0928\u094d\u0924\u093f\u0964 \u0905\u092a\u093f \u091a, \u0938\u0930\u094d\u0935\u0947\u093d\u092a\u093f \u092c\u0928\u094d\u0927\u0941\u0924\u094d\u0935-\u092d\u093e\u0935\u0928\u092f\u093e \u092a\u0930\u0938\u094d\u092a\u0930\u0902 \u0935\u094d\u092f\u0935\u0939\u0930\u0928\u094d\u0924\u0941\u0964", + "metadata": { + "languages": [ + "hin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7013f596e8a99afdd7965ac753815ad9", + "text": "Sanskrit (Grantha) \ud804\udf38\ud804\udf30\ud804\udf4d\ud804\udf35\ud804\udf47 \ud804\udf2e\ud804\udf3e\ud804\udf28\ud804\udf35\ud804\udf3e\ud804\udf03 \ud804\udf38\ud804\udf4d\ud804\udf35\ud804\udf24\ud804\udf28\ud804\udf4d\ud804\udf24\ud804\udf4d\ud804\udf30\ud804\udf3e\ud804\udf03 \ud804\udf38\ud804\udf2e\ud804\udf41\ud804\udf24\ud804\udf4d\ud804\udf2a\ud804\udf28\ud804\udf4d\ud804\udf28\ud804\udf3e\ud804\udf03 \ud804\udf35\ud804\udf30\ud804\udf4d\ud804\udf24\ud804\udf28\ud804\udf4d\ud804\udf24\ud804\udf47 \ud804\udf05\ud804\udf2a\ud804\udf3f \ud804\udf1a, \ud804\udf17\ud804\udf4c\ud804\udf30\ud804\udf35\ud804\udf26\ud804\udf43\ud804\udf36\ud804\udf3e \ud804\udf05\ud804\udf27\ud804\udf3f\ud804\udf15\ud804\udf3e\ud804\udf30\ud804\udf26\ud804\udf43\ud804\udf36\ud804\udf3e \ud804\udf1a \ud804\udf38\ud804\udf2e\ud804\udf3e\ud804\udf28\ud804\udf3e\ud804\udf03 \ud804\udf0f\ud804\udf35 \ud804\udf35\ud804\udf30\ud804\udf4d\ud804\udf24\ud804\udf28\ud804\udf4d\ud804\udf24\ud804\udf47\u0964 \ud804\udf0f\ud804\udf24\ud804\udf47 \ud804\udf38\ud804\udf30\ud804\udf4d\ud804\udf35\ud804\udf47 \ud804\udf1a\ud804\udf47\ud804\udf24\ud804\udf28\ud804\udf3e-\ud804\udf24\ud804\udf30\ud804\udf4d\ud804\udf15-\ud804\udf36\ud804\udf15\ud804\udf4d\ud804\udf24\ud804\udf3f\ud804\udf2d\ud804\udf4d\ud804\udf2f\ud804\udf3e\ud804\udf02 \ud804\udf38\ud804\udf41\ud804\udf38\ud804\udf2e\ud804\udf4d\ud804\udf2a\ud804\udf28\ud804\udf4d\ud804\udf28\ud804\udf3e\ud804\udf03 \ud804\udf38\ud804\udf28\ud804\udf4d\ud804\udf24\ud804\udf3f\u0964 \ud804\udf05\ud804\udf2a\ud804\udf3f \ud804\udf1a, \ud804\udf38\ud804\udf30\ud804\udf4d\ud804\udf35\ud804\udf47\ud804\udf3d\ud804\udf2a\ud804\udf3f \ud804\udf2c\ud804\udf28\ud804\udf4d\ud804\udf27\ud804\udf41\ud804\udf24\ud804\udf4d\ud804\udf35-\ud804\udf2d\ud804\udf3e\ud804\udf35\ud804\udf28\ud804\udf2f\ud804\udf3e \ud804\udf2a\ud804\udf30\ud804\udf38\ud804\udf4d\ud804\udf2a\ud804\udf30\ud804\udf02 \ud804\udf35\ud804\udf4d\ud804\udf2f\ud804\udf35\ud804\udf39\ud804\udf30\ud804\udf28\ud804\udf4d\ud804\udf24\ud804\udf41\u0964", + "metadata": { + "languages": [ + "nep" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d9dd825f97644f9be308505d418e9ea9", + "text": "S\u00e3otomense Tudu ngu\u00ea di mundu ca nanc\u00ea livli e igual ni dignidade e ni dir\u00eatu. Punda nen ca pens\u00e1 e nen t\u00ea cunxensa, sel\u00e1 nen f\u00e9 tudu cu\u00e1 cu ten\u00e7\u00f3n de lum\u00f3n.", + "metadata": { + "languages": [ + "por", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ea94e46fedb24cbbc337bb5d30608ead", + "text": "Sardinian, Logudorese Totu sos \u00e8sseres umanos naschint l\u00ecberos e eguales in dinnidade e in deretos. Issos tenent sa resone e sa cuss\u00e8ntzia e depent operare s'unu cun s'\u00e0teru cun isp\u00ecritu de fraternidade.", + "metadata": { + "languages": [ + "cat", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "135f949e79e915feb11563f40072624d", + "text": "Saxon, Low All de Minschen s\u00fcnd frie un gliek an W\u00fc\u00fcrd un Rechten baren. Se hebbt Vernunft un een Geweten un se sch\u00fcllt sik Br\u00f6der sien.", + "metadata": { + "languages": [ + "deu" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f82500b37b7d47fc3ce4735513427e14", + "text": "Scots Aw human sowels is born free and equal in dignity and richts. They are tochered wi mense and conscience and shuld guide theirsels ane til ither in a speirit o britherheid.", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "49685f2659217462214b13c3594d1423", + "text": "Secoya Si'apai aide'oy\u00eb kua'ye peoye kui'ne siay\u00eb'k\u00eb maka pa'iye kui'ne tutupaye koni, jaje kuasase's\u00ebtepi kuaju'i'ne peoye \u00f1ese saiye pa'iji ko\u0331kaij\u00eb yek\u00eb paireje.", + "metadata": { + "languages": [ + "sqi", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "e0ca8f739a2a274e0e30bcd509b308e2", + "text": "Seraiki \u0633\u0627\u0631\u06d2 \u0627\u0646\u0633\u0627\u0646 \u0627\u0632\u0627\u062f\u0627 \u062a\u06d2 \u062d\u0642\u0648\u0642 \u062a\u06d2 \u0639\u0632\u062a \u062f\u06d2 \u0627\u0639\u062a\u0628\u0627\u0631 \u0646\u0627\u0644 \u06c1\u06a9\u0648 \u0684\u0626\u06d2 \u067e\u06cc\u062f\u0627 \u062a\u06be\u06cc\u0646\u062f\u0646 \u06d4 \u0642\u062f\u0631\u062a \u0648\u0644\u0648\u06ba \u0627\u0646\u06c1\u0627\u06ba \u06a9\u0648\u06ba \u0639\u0642\u0644 \u062a\u06d2 \u0633\u0645\u062c\u06be \u0639\u0637\u0627 \u062a\u06be\u06cc\u0646\u062f\u06cc \u0627\u06d2 \u06d4 \u06c1\u06cc\u06ba \u06a9\u06cc\u062a\u06d2 \u06c1\u06a9 \u068b\u0648\u062c\u06be\u06d2 \u0646\u0627\u0644 \u0628\u06be\u0631\u067e\u06cc \u062f\u0627\u0633\u0644\u0648\u06a9 \u06a9\u0631\u06bb\u0627 \u0686\u0627\u06c1\u06cc \u062f\u0627 \u0627\u06d2 \u06d4", + "metadata": { + "languages": [ + "urd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f855b701f2717951ee7041f505936e9e", + "text": "Serbian (Cyrillic) \u0421\u0432\u0430 \u0459\u0443\u0434\u0441\u043a\u0430 \u0431\u0438\u045b\u0430 \u0440\u0430\u0452\u0430\u0458\u0443 \u0441\u0435 \u0441\u043b\u043e\u0431\u043e\u0434\u043d\u0430 \u0438 \u0458\u0435\u0434\u043d\u0430\u043a\u0430 \u0443 \u0434\u043e\u0441\u0442\u043e\u0458\u0430\u043d\u0441\u0442\u0432\u0443 \u0438 \u043f\u0440\u0430\u0432\u0438\u043c\u0430. \u041e\u043d\u0430 \u0441\u0443 \u043e\u0431\u0434\u0430\u0440\u0435\u043d\u0430 \u0440\u0430\u0437\u0443\u043c\u043e\u043c \u0438 \u0441\u0432\u0435\u0448\u045b\u0443 \u0438 \u0442\u0440\u0435\u0431\u0430 \u0458\u0435\u0434\u043d\u0438 \u043f\u0440\u0435\u043c\u0430 \u0434\u0440\u0443\u0433\u0438\u043c\u0430 \u0434\u0430 \u043f\u043e\u0441\u0442\u0443\u043f\u0430\u0458\u0443 \u0443 \u0434\u0443\u0445\u0443 \u0431\u0440\u0430\u0442\u0441\u0442\u0432\u0430.", + "metadata": { + "languages": [ + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1e1d32ffc1c937e2dc9b3b4e6b8a1453", + "text": "Serbian (Latin) Sva ljudska bi\u0107a ra\u0111aju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i sve\u0161\u0107u i treba jedni prema drugima da postupaju u duhu bratstva.", + "metadata": { + "languages": [ + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6a973a162a71cdf61973afc03d10bb08", + "text": "Serer-Sine Wiin we naa \u00f1oowaa na adna, den fop mbodu no ke war na oxnu refna na den a jega o ngalaat umpi yiif um, le mbarin o me\u01adtootaa baa mbaag o \u00f1oow den fop no fog.", + "metadata": { + "languages": [ + "som", + "afr", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "201296ccbaf34300a62d4a087915bf84", + "text": "Seselwa Creole French Nou tou imen nou\u2019n ne dan laliberte ek legalite, dan nou dignite ek nou bann drwa. Nou tou nou annan kapasite pou rezonnen, e fodre nou azir anver lezot avek en lespri fraternel.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "f602d39c8cf6ba79e59adce09af30f26", + "text": "Shan \u1075\u1030\u107c\u103a\u1038\u1075\u1030\u108a\u1075\u1031\u1083\u1089\u107c\u1086\u1089 \u1015\u1035\u107c\u103a\u1022\u107c\u103a\u1075\u102d\u1030\u1010\u103a\u1087\u1019\u1083\u1038\u101c\u1030\u107a\u103a\u1088\u1075\u102f\u1004\u103a\u1087\u1019\u102f\u107c\u103a\u1022\u107c\u103a\u101c\u103d\u1010\u103a\u1088\u101c\u1085\u101d\u103a\u1038\u107d\u1035\u1004\u103a\u1087\u1015\u1035\u1004\u103a\u1038\u1075\u107c\u103a \u101c\u1084\u1088 \u101e\u102f\u107c\u103a\u1087\u101c\u1086\u1088\u1022\u107c\u103a \u101c\u103d\u1010\u103a\u1088\u101c\u1085\u101d\u103a\u1038\u107d\u1035\u1004\u103a\u1087 \u1015\u1035\u1004\u103a\u1038\u1075\u107c\u103a\u104b \u1076\u101d\u103a\u107c\u1086\u1089 \u1019\u102e\u1038\u107a\u1062\u107c\u103a\u1087\u1022\u107c\u103a\u1019\u1031\u1083\u1011\u1010\u103a\u1038\u101e\u1062\u1004\u103a \u101c\u1084\u1088 \u1078\u1082\u103a\u1022\u107c\u103a\u1081\u1030\u1089\u1078\u1075\u103a\u1038\u107e\u102d\u1004\u103a\u1088\u1010\u102d\u102f\u101d\u103a\u1038\u1075\u1019\u103a \u107c\u107c\u103a\u1089\u101c\u1084\u1088 \u1011\u102f\u1075\u103a\u1087\u101d\u1086\u1089\u1078\u1082\u103a\u1015\u102e\u1088\u1022\u103d\u1075\u103a\u1087 \u107c\u103d\u1004\u103a\u1089\u1076\u1086\u1087\u1075\u107c\u103a\u101e\u1031 \u1010\u102d\u1010\u103a\u1038\u1010\u1031\u1083\u1087\u1075\u107c\u103a\u104b", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b15fec7016d85b918b25ef8273accb9c", + "text": "Sharanahua Nantifin naanno rasisin cainnifoquin. Tsoan mato iscahuatiroma cuscan, -Manfin uhuunnacoinquin. Ahuua tsacatama rarama shara ninonfo ishon. Nantififain aton mapo shinantirofoquin. Ato nomuranrin chaca iyamarain sharamainqui icashon. Ascanrifiantan nantifin manifoti yorahuan tanannon icashu.", + "metadata": { + "languages": [ + "ita", + "eng", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "20e37b3914fade183f3e76b200daccbd", + "text": "Shilluk Dhanh\u00f8 b\u00ebne ba anyw\u00f8l\u00f8 e path ki b\u00e4ng, ge p\u00e4r ki yij b\u00eb\u00ebd\u00f8 geki dy\u00ebr\u00f8. g\u00efn-a dwaddi kiper gen y\u00ef gen da rumi ki b\u00eb\u00ebd\u00f8 m\u00f8 g\u00f6\u00f6g gen ki py\u00ebw akyel ga nyim\u00ebgg.", + "metadata": { + "languages": [ + "ind", + "sqi", + "tur", + "nor" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9be888269d99ba5b9d4200b2a6d65346", + "text": "Shipibo-Conibo Jat\u00edbi joninra huetsa jonibaon yoiai ninc\u00e1resti iqui, jahueraquibi jaconmai iamaquin; jainoash jahuen queena jacon jahu\u00e9quibo ati jahuequescamabi iqui, tsonbira amayamatima iqui. Jaticashbira jascara aresti jacon shinanya iti jahuequescamabi iqui, jahuequescarainoash picota joni inonbi. Huestiora huestiorabora jahu\u00e9qui ati shinanya iqui; jainshon onanribique jahueratoqui jacon iqui jainoash jaconma iqui ishon. Ja copira huetsa jonibires inonbi non jato jaconharesti iqui, non huetsabi non acai quescaaquin.", + "metadata": { + "languages": [ + "cat", + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6398e07c884e0212b6da4c16f73364fa", + "text": "Shona Vanhu vese vanoberekwa vakasununguka uyewo vakaenzana pahunhu nekodzero dzavo. Vanhu vese vanechipo chokufunga nekuziva chakaipa nechakanaka saka vanofanira kubatana nomweya wohusahwira.", + "metadata": { + "languages": [ + "swa", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "98765accca3aa276e32acc6ddb665f01", + "text": "Shor \u041f\u0430\u0440\u0447\u044b\u043d \u043a\u0438\u0436\u0438, \u043f\u043e \u0447\u0430\u0440\u044b\u049b\u049b\u0430 \u0442\u0443\u0493\u0447\u0430\u0434\u044b\u043f, \u0442\u0435\u04a3, \u043f\u043e\u0448 \u0442\u0443\u0493\u0447\u0430. \u041a\u0438\u0436\u0438\u043b\u0435\u0440 \u0441\u0430\u0493\u044b\u0448\u0442\u044b\u0493, \u0430\u049b\u0442\u044b\u0493 \u0442\u0443\u0493\u0447\u0430\u043b\u0430\u0440, \u043a\u0438\u0436\u0438\u043b\u0435\u0440\u0433\u0435 \u043f\u0430\u0448\u049b\u0430 \u043a\u0438\u0436\u0438\u043b\u0435\u0440\u0431\u0435 \u0430\u0440\u0493\u044b\u0448\u0442\u0430\u043d\u044b\u0448\u0442\u0430\u0440\u0493\u0430 \u043a\u0435\u0440\u0435\u043a.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "06b44e2713d2ab9cbfdbffecc788465a", + "text": "Shuar Aents yaj\u00e1 nunkanam ak\u00ednia asamtaish, metekrak ainiaji. Tumasha ni chichamenka tuke amiktin a\u00edniawai. Ni iniakmamuri, n\u00ed chichamejaituke aniakmamsar chichakartin a\u00edniawai. Tuma asamtai aents mash nekawar, penker metekrak, nuamtak war\u00e1 warat shiir pujusarmi tusar a\u00e1rma awai.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8e0cb1b65226a998ba0e2831e44dbe49", + "text": "Sidamo Manchi beetti kalaqamunni wolaphinoho. Ayirrinyunninna qoossotennino taaloho. Huwatanno tiiano kalaqamunni ba\u2019raarinoha ikkasinni mittu wolu ledo rodiimmate ayyaaninni hee\u2019ra noosi.", + "metadata": { + "languages": [ + "fin", + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1129172b2baa1c40a3ab800d0d28f02b", + "text": "Sinhala \u0dc3\u0dd2\u0dba\u0dbd\u0dd4 \u0db8\u0db1\u0dd4\u0dc2\u0dca\u200d\u0dba\u0dba\u0ddd \u0db1\u0dd2\u0daf\u0dc4\u0dc3\u0dca\u0dc0 \u0d8b\u0db4\u0dad \u0dbd\u0db6\u0dcf \u0d87\u0dad. \u0d9c\u0dbb\u0dd4\u0dad\u0dca\u0dc0\u0dba\u0dd9\u0db1\u0dca \u0dc4\u0dcf \u0d85\u0dba\u0dd2\u0dad\u0dd2\u0dc0\u0dcf\u0dc3\u0dd2\u0d9a\u0db8\u0dca\u0dc0\u0dbd\u0dd2\u0db1\u0dca \u0dc3\u0db8\u0dcf\u0db1 \u0dc0\u0dd9\u0dad\u0dd2. \u0dba\u0dd4\u0d9a\u0dca\u0dad\u0dd2 \u0d85\u0dba\u0dd4\u0d9a\u0dca\u0dad\u0dd2 \u0db4\u0dd2\u0dc5\u0dd2\u0db6\u0db3 \u0dc4\u0dd0\u0d9f\u0dd3\u0db8\u0dd9\u0db1\u0dca \u0dc4\u0dcf \u0dc4\u0dd8\u0daf\u0dba \u0dc3\u0dcf\u0d9a\u0dca\u0dc2\u0dd2\u0dba\u0dd9\u0db1\u0dca \u0dba\u0dd4\u0dad\u0dca \u0d94\u0dc0\u0dd4\u0db1\u0dca, \u0d94\u0dc0\u0dd4\u0db1\u0ddc\u0dc0\u0dd4\u0db1\u0dca\u0da7 \u0dc3\u0dd0\u0dc5\u0d9a\u0dd2\u0dba \u0dba\u0dd4\u0dad\u0dca\u0dad\u0dda \u0dc3\u0dc4\u0ddd\u0daf\u0dbb\u0dad\u0dca\u0dc0\u0dba \u0db4\u0dd2\u0dc5\u0dd2\u0db6\u0db3 \u0dc4\u0dd0\u0d9f\u0dd3\u0db8\u0dd9\u0db1\u0dd2.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7f18ad35feab9b6f20b97d87856143c8", + "text": "Siona Sia'bai\u0331 aideo'y\u00eb goa'ye beoye gu\u0331i'ne sia'y\u00eb'qu\u00eb maca bai'ye gu\u0331i'ne qu\u00ebco baye co\u0331ni, ja\u0331je\u0331 goachase's\u00ebte goa'ju\u0331i'\u00f1e beoye \u00f1ese saiye bai'ji co\u0331caij\u00eb yequ\u00eb bai\u0331reje.", + "metadata": { + "languages": [ + "sqi", + "spa", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c82f4633a9724d1de7dfe866d1429080", + "text": "Slovak V\u0161etci \u013eudia sa rodia slobodn\u00ed a sebe rovn\u00ed , \u010do sa t\u00fdka ich dostojnosti a pr\u00e1v. S\u00fa obdaren\u00ed rozumom a maj\u00fa navz\u00e1jom jedna\u0165 v bratskom duchu.", + "metadata": { + "languages": [ + "slk" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e35bcc47bb6a16df85ee45b9ba96ecf6", + "text": "Slovenian Vsi ljudje se rodijo svobodni in imajo enako dostojanstvo in enake pravice. Obdarjeni so z razumom in vestjo in bi morali ravnati drug z drugim kakor bratje.", + "metadata": { + "languages": [ + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "922133be59b9810ea4ae495847315aa2", + "text": "Somali Aadanaha dhammaantiis wuxuu dhashaa isagoo xor ah kana siman xagga sharafta iyo xuquuqada Waxaa Alle (Ilaah) siiyay aqoon iyo wacyi, waana in qof la arkaa qofka kale ula dhaqmaa si walaaltinimo ah.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5d86d8cbc9dda45558ccf60a3974e66a", + "text": "Soninke Haadama renme su saareyen \u014ba an na du-kitten \u00f1a, an nta sere komaaxu, an do soron su yan yekka dorontaaxu do taqu. Haqilen, wa sere su, a do soro kuttu nan siri terene doome kappalengaaxu kanma.", + "metadata": { + "languages": [ + "som", + "ind", + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2254a39b8eef4c825a973c26eb9364c9", + "text": "Sorbian, Upper W\u0161itcy \u010d\u0142owjekojo su wot naroda swobodni a su jenacy po dostojnos\u0107i a prawach. Woni su z rozumom a sw\u011bdomjom wobdarjeni a maja mjezsobu w duchu bratrowstwa wobchad\u017ae\u0107.", + "metadata": { + "languages": [ + "pol", + "hrv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f6b37545577a2f9471636b40acbc5bf3", + "text": "Sotho, Northern Batho ka moka ba belegwe ba lokologile le gona ba na le seriti sa go lekana le ditokelo. Ba filwe monagano le letswalo mme ba swanet\u0161e go swarana ka moya wa bana ba mpa.", + "metadata": { + "languages": [ + "tgl", + "som", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4350ebc60622fe730a65cf3c01ab0656", + "text": "Sotho, Southern Batho bohle ba tswetswe ba lokolohile mme ba lekana ka botho le ditokelo. Ba tswetswe le monahano le letswalo mme ba tlamehile ho phedisana le ba bang ka moya wa boena.", + "metadata": { + "languages": [ + "eng", + "som", + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "51733b425e93924dbea419a28d2ee3d2", + "text": "South Azerbaijani Tu\u0308m insanlar hu\u0308r do\u0308g\u0306arlar, hak ve onur bak\u0131m\u0131ndan es\u0327it do\u0308g\u0306arlar, onlar ak\u0131l ve vicdana sahiptirler ve birbirlerine kars\u0327\u0131 kardes\u0327lik ruhu ic\u0327inde davranmal\u0131lar.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7c2e8d871037d3d152d88dc5510cb236", + "text": "Spanish Todos los seres humanos nacen libres e iguales en dignidad y derechos y, dotados como est\u00e1n de raz\u00f3n y conciencia, deben comportarse fraternalmente los unos con los otros.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "816bdd2e0af6f8cc514fe60150f4714b", + "text": "Spanish (resolution) Todos los seres humanos nacen libres e iguales en dignidad y derechos y, dotados como est\u00e1n de raz\u00f3n y conciencia, deben comportarse fraternalmente los unos con los otros.", + "metadata": { + "languages": [ + "spa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "52f8c540d96bbacf23226e192b534cb7", + "text": "Sukuma Banhu bose bakabyalagwa na wiyabi na bakabizaga na makujo na sekge jabo jilenganilile. Banhu bose bakabizaga na masala na buhabuji; hukuyomba balidakilwa gubi na witogwa gidi bana ba myaji umo.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a0574e22703ed205dfa2f5a01e2341bd", + "text": "Sunda Sakumna jalma gubrag ka alam dunya teh sifatna merdika jeung boga martabat katut hak-hak anu sarua . Maranehna dibere akal jeung hate nurani, campur-gaul jeung sasamana aya dina sumanget duduluran.", + "metadata": { + "languages": [ + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cf93d32f84284c9d205953f2720290ba", + "text": "Susu Adamadie birin barixin\u025b e lan y\u025bt\u025bralui kui, y\u025bt\u025b kolonyi nun y\u025bt\u025b suxu kima. Fondoe nun faxamui na e b\u025b boresuxu kima bariboreya fanyi kui.", + "metadata": { + "languages": [ + "som", + "tur", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5608031dfa2172453e04237b10f7a8b6", + "text": "Swahili Watu wote wamezaliwa huru, hadhi na haki zao ni sawa. Wote wamejaliwa akili na dhamiri, hivyo yapasa watendeane kindugu.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7051a25e3c9317ae9774432a3874e4e3", + "text": "Swati Bonkhe bantfu batalwa bakhululekile balingana ngalokufananako ngesitfunti nangemalungelo. Baphiwe ingcondvo nekucondza kanye nanembeza ngakoke bafanele batiphatse nekutsi baphatse nalabanye ngemoya webuzalwane.", + "metadata": { + "languages": [ + "tgl", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "962be1c35a09978ec0be3e93852b6925", + "text": "Swedish Alla m\u00e4nniskor \u00e4ro f\u00f6dda fria och lika i v\u00e4rde och r\u00e4ttigheter. De \u00e4ro utrustade med f\u00f6rnuft och samvete och b\u00f6ra handla gentemot varandra i en anda av broderskap.", + "metadata": { + "languages": [ + "swe" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "99cf243f8a375bd33a573d5c26fcce02", + "text": "Tagalog Ang lahat ng tao'y isinilang na malaya at pantay-pantay sa karangalan at mga karapatan. Sila'y pinagkalooban ng katwiran at budhi at dapat magpalagayan ang isa't isa sa diwa ng pagkakapatiran.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1fbce46911c4817cf2f0bf0db19d2f32", + "text": "Tagalog (Tagalog) \u1700\u1705 \u170e\u1711\u1706\u1714 \u1705 \u1706\u1702\u170c\u1714 \u1701\u1710\u1712\u1708\u1712\u170e\u1705 \u1708 \u170b\u170e\u170c \u1700\u1706\u1714 \u1709\u1708\u1714\u1706\u170c\u1714 \u1709\u1708\u1714\u1706\u170c\u1714 \u1710 \u1703\u1707\u1705\u170e\u1708\u1714 \u1700\u1706\u1714 \u170b\u1714\u1704 \u1703\u1707\u1713\u1709\u1706\u1708\u1714\u1736 \u1710\u1712\u170e\u170c\u1714 \u1709\u1712\u1708\u1704\u1714\u1703\u170e\u1713\u170a\u1708\u1714 \u1705 \u1703\u1706\u1714\u170f\u1712\u1707\u1708\u1714 \u1700\u1706\u1714 \u170a\u1713\u1707\u1714\u1711\u1712 \u1700\u1706\u1714 \u1707\u1709\u1706\u1714 \u170b\u1704\u1714\u1709\u170e\u1704\u170c\u1708\u1714 \u1700\u1705 \u1701\u1710\u1706\u1714 \u1701\u1710 \u1710 \u1707\u1712\u170f \u1705 \u1709\u1704\u1714\u1703\u1703\u1709\u1706\u1712\u1707\u1708\u1714\u1736", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f80202b3162be68cd2957c5c564ddc03", + "text": "Tahitian E fanauhia te t\u0101'\u0101to'ara'a o te ta'ata-tupu ma te ti'am\u0101 e te ti'amanara'a 'aifaito. Ua '\u012b te mana'o pa'ari e i te manava e ma te 'a'au taea'e 'oia ta ratou ha'a i rotop\u016b ia ratou iho, e ti'a ai;", + "metadata": { + "languages": [ + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "b5b3558a1982151293ab4f2c745e943b", + "text": "Tai Dam \uaab9\uaa95\uaab8\uaa89 \uaa80\uaab1 \uaa8b\uaab4 \uaadb \uaa8e\uaab2\uaa89 \uaaae\uaaae\uaa80 \uaaa3\uaab1 \uaabb\uaaa0 \uaa81\uaab7 \uaabb\uaaac \uaabc\uaa92 \uaa95\uaab3 \uaa95\uaab1\uaa89 \uaa80\uaabe\uaa9a \uaab9\uaa8b\uaab7\uaa89 \uaa9d\uaab8\uaa89 \uaa95\uaaae\uaaa5 \uaaa9\uaabe \uaadb \uaab6\uaa94\uaa99 \uaaa0\uaab4 - \uaa8b\uaab4 \uaaac\uaaba \uaadb \uaabb\uaaa0 \uaa81\uaab7 \uaabb\uaaac \uaaa3\uaab2 \uaa81\uaaab\uaab8\uaa99 \uaa8e\uaab1\uaa89 \uaab6\uaa8e\uaaa3 \uaaa9\uaaba\uaa89 \uaab9\uaaa5\uaab8\uaa92 \uaadb \uaa80\uaabe\uaa9a \uaab9\uaaa5\uaab8\uaa92 \uaabb\uaa8a \uaa9a\uaab4\uaa99 \uaa80\uaabe\uaa9a \uaabc\uaa92 \uaab9\uaa9a\uaab7\uaa89 \uaa92\uaab2 \uaa80\uaabe\uaa9a \uaaab\uaab8\uaa80 \uaaad\uaab0\uaa80 \uaab5\uaa9d\uaa89 \uaab9\uaa8f\uaa89 \uaab9\uaaad\uaa99 \uaa92\uaab8\uaaab.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "424be8d53e2447fd43a7df9c88610eb3", + "text": "Tajiki \u0422\u0430\u043c\u043e\u043c\u0438 \u043e\u0434\u0430\u043c\u043e\u043d \u043e\u0437\u043e\u0434 \u0432\u0430 \u0430\u0437 \u043b\u0438\u04b3\u043e\u0437\u0438 \u0448\u0430\u0440\u0430\u0444\u0443 \u04b3\u0443\u049b\u0443\u049b \u0431\u0430 \u04b3\u0430\u043c \u0431\u0430\u0440\u043e\u0431\u0430\u0440 \u0431\u0430 \u0434\u0443\u043d\u0451 \u043c\u0435\u043e\u044f\u043d\u0434. \u041e\u043d\u04b3\u043e \u0441\u043e\u04b3\u0438\u0431\u0438 \u0430\u049b\u043b\u0443 \u0432\u0438\u04b7\u0434\u043e\u043d\u0430\u043d\u0434 \u0432\u0430 \u0431\u043e\u044f\u0434 \u0431\u043e \u044f\u043a\u0434\u0438\u0433\u0430\u0440 \u043c\u0443\u043d\u043e\u0441\u0438\u0431\u0430\u0442\u0438 \u0431\u0430\u0440\u043e\u0434\u0430\u0440\u043e\u043d\u0430 \u0434\u043e\u0448\u0442\u0430 \u0431\u043e\u0448\u0430\u043d\u0434.", + "metadata": { + "languages": [ + "mkd", + "rus", + "bul" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "30aa2c0edeca02853a028f15110a6827", + "text": "Talysh H\u0259mm\u0259 insonon b\u0259\u015ft\u0259 l\u0259yo\u011f\u0259ti iy\u0259n h\u0259xonro ozod iy\u0259n b\u0259rob\u0259r movard\u0259 bed\u0259n. \u00c7\u0259von \u015fuur iy\u0259n vicdon hese, \u0259ve ki, dey\u0259nd\u0131 m\u0131nasib\u0259t\u0259d\u0259 b\u0259n\u0259 b\u0131v\u0259 r\u0259ftor kard\u0259ninin.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "615dde6386c8f1b795ccd07901216ce7", + "text": "Tamang, Eastern \u092e\u094d\u0939\u094b\u0915\u094d\u0915\u094b\u0928 (\u0917\u094b\u0926\u094b\u092a) \u0928\u094b\u0928 \u092e\u094d\u0939\u0940\u092e \u0915\u0947\u092a\u093e\u0928\u094d\u0939\u093e\u092a\u093e \u0939\u0947\u0928\u094d\u091b\u0947 \u0928\u0941\u0928 \u0939\u093e\u0919\u092a\u093e\u0919\u0935\u093e (\u0938\u094d\u0935\u0924\u0928\u094d\u0924\u094d\u0930) \u092f\u093e\u0919\u0935\u093e \u0939\u0940\u0928\u094d\u0928\u093e \u0964 \u0925\u0947 \u092e\u094d\u0939\u094b\u0915\u094d\u0915\u094b\u0928\u0932\u093e (\u0917\u094b\u0926\u094b\u092a\u0932\u093e) \u091a\u094d\u092f\u094b\u091a\u094d\u092f\u094b \u092f\u093e\u0919\u0924\u093e\u092e \u0925\u0947\u0928 \u092e\u0939\u0924\u094d\u0935 \u092e\u0941\u0932\u093e \u0964 \u0925\u0947\u0928\u0940\u0915\u093e\u0926\u0947\u0930\u0940 \u0938\u0947\u092e\u092c\u093e\u0919 (\u0935\u093f\u091a\u093e\u0930 \u0936\u0915\u094d\u0924\u093f) \u0926\u0947\u0928 \u0925\u0941-\u0938\u0947\u092e\u0938\u093e\u0919 \u092e\u0941\u092c\u093e\u0938\u0947 \u0925\u0947\u0928\u0940\u091c\u0941\u0917\u0941\u0938\u0947 \u0939\u094d\u0930\u093e\u0919\u0928\u094d\u0939\u093e\u0919\u0930\u0940 \u0928\u0941\u0928 \u0925\u0947\u0924\u094d\u092e\u093e\u0932\u093e \u0938\u0947\u092e\u0932\u0947\u0919\u092e\u094b\u0917\u094d\u092f\u093e\u092e\u094d\u0938\u0947 (\u092d\u0935\u0928\u093e\u092c\u093e\u091f) \u0917\u094d\u092f\u0947 \u0932\u093e\u0924\u094b\u092c\u093e\u0928 \u092e\u0941\u0932\u093e \u0964", + "metadata": { + "languages": [ + "nep" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f484ee723443631e755f61ec59737260", + "text": "Tamazight, Central Atlas Imdanen, akken ma llan ttlalen d ilelliyen msawan di lh\u0323wer\u0323ma d yizerfan- ghur sen tamsakwit d l\u00e2quel u yessefk ad-tili tegmatt gar asen.", + "metadata": { + "languages": [ + "tur", + "nld" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "4fa699fe9b09ce455b4b7a0eceac23a4", + "text": "Tamazight, Central Atlas (Tifinagh) \u2d49\u2d4e\u2d37\u2d30\u2d4f\u2d3b\u2d4f, \u2d30\u2d3d\u2d3d\u2d3b\u2d4f \u2d4e\u2d30 \u2d4d\u2d4d\u2d30\u2d4f \u2d5c\u2d5c\u2d4d\u2d30\u2d4d\u2d3b\u2d4f \u2d37 \u2d49\u2d4d\u2d3b\u2d4d\u2d4d\u2d49\u2d62\u2d3b\u2d4f \u2d4e\u2d59\u2d30\u2d61\u2d30\u2d4f \u2d37\u2d49 \u2d4d\u2d43\u2d61\u2d3b\u2d55\u2d4e\u2d30 \u2d37 \u2d62\u2d49\u2d63\u2d3b\u2d54\u2d3c\u2d30\u2d4f-\u2d56\u2d53\u2d54 \u2d59\u2d3b\u2d4f \u2d5c\u2d30\u2d4e\u2d59\u2d30\u2d3d\u2d61\u2d49\u2d5c \u2d37 \u2d4d\u2d30\u2d47\u2d53\u2d3b\u2d4d \u2d53 \u2d62\u2d3b\u2d59\u2d59\u2d3b\u2d3c\u2d3d \u2d30\u2d37-\u2d5c\u2d49\u2d4d\u2d49 \u2d5c\u2d3b\u2d33\u2d4e\u2d30\u2d5c\u2d5c \u2d33\u2d30\u2d54 \u2d30\u2d59\u2d3b\u2d4f.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "c36059cd99076234366c10f07f278260", + "text": "Tamazight, Standard Morocan \u2d30\u2d54 \u2d37 \u2d5c\u2d5c\u2d4d\u2d30\u2d4d\u2d30\u2d4f \u2d4e\u2d49\u2d37\u2d37\u2d4f \u2d33\u2d30\u2d4f \u2d49\u2d4d\u2d3b\u2d4d\u2d4d\u2d49\u2d5c\u2d4f \u2d4e\u2d33\u2d30\u2d37\u2d37\u2d30\u2d4f \u2d56 \u2d61\u2d30\u2d37\u2d37\u2d53\u2d54 \u2d37 \u2d49\u2d63\u2d54\u2d3c\u2d30\u2d4f, \u2d62\u2d49\u2d4d\u2d49 \u2d30\u2d3d\u2d6f \u2d37\u2d30\u2d54\u2d59\u2d4f \u2d53\u2d4f\u2d4d\u2d4d\u2d49 \u2d37 \u2d53\u2d3c\u2d54\u2d30\u2d3d, \u2d49\u2d4d\u2d4d\u2d30 \u2d3c\u2d4d\u2d4d\u2d30 \u2d59\u2d4f \u2d30\u2d37 \u2d5c\u2d5c\u2d4e\u2d62\u2d30\u2d61\u2d30\u2d59\u2d4f \u2d4f\u2d33\u2d54\u2d30\u2d5c\u2d59\u2d4f \u2d59 \u2d5c\u2d30\u2d33\u2d4e\u2d30\u2d5c.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "703b672337c499aededf6f6696d6522f", + "text": "Tamil \u0bae\u0ba9\u0bbf\u0ba4\u0baa\u0bcd \u0baa\u0bbf\u0bb1\u0bbf\u0bb5\u0bbf\u0baf\u0bbf\u0ba9\u0bb0\u0bcd \u0b9a\u0b95\u0bb2\u0bb0\u0bc1\u0bae\u0bcd \u0b9a\u0bc1\u0ba4\u0ba8\u0bcd\u0ba4\u0bbf\u0bb0\u0bae\u0bbe\u0b95\u0bb5\u0bc7 \u0baa\u0bbf\u0bb1\u0b95\u0bcd\u0b95\u0bbf\u0ba9\u0bcd\u0bb1\u0ba9\u0bb0\u0bcd; \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0bae\u0ba4\u0bbf\u0baa\u0bcd\u0baa\u0bbf\u0bb2\u0bc1\u0bae\u0bcd, \u0b89\u0bb0\u0bbf\u0bae\u0bc8\u0b95\u0bb3\u0bbf\u0bb2\u0bc1\u0bae\u0bcd \u0b9a\u0bae\u0bae\u0bbe\u0ba9\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd, \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0ba8\u0bbf\u0baf\u0bbe\u0baf\u0ba4\u0bcd\u0ba4\u0bc8\u0baf\u0bc1\u0bae\u0bcd \u0bae\u0ba9\u0b9a\u0bcd\u0b9a\u0bbe\u0b9f\u0bcd\u0b9a\u0bbf\u0baf\u0bc8\u0baf\u0bc1\u0bae\u0bcd \u0b87\u0baf\u0bb1\u0bcd\u0baa\u0ba3\u0bcd\u0baa\u0bbe\u0b95\u0baa\u0bcd \u0baa\u0bc6\u0bb1\u0bcd\u0bb1\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd. \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0b92\u0bb0\u0bc1\u0bb5\u0bb0\u0bc1\u0b9f\u0ba9\u0bca\u0bb0\u0bc1\u0bb5\u0bb0\u0bcd \u0b9a\u0b95\u0bcb\u0ba4\u0bb0 \u0b89\u0ba3\u0bb0\u0bcd\u0bb5\u0bc1\u0baa\u0bcd \u0baa\u0bbe\u0b99\u0bcd\u0b95\u0bbf\u0bb2\u0bcd \u0ba8\u0b9f\u0ba8\u0bcd\u0ba4\u0bc1\u0b95\u0bca\u0bb3\u0bcd\u0bb3\u0bb2\u0bcd \u0bb5\u0bc7\u0ba3\u0bcd\u0b9f\u0bc1\u0bae\u0bcd.", + "metadata": { + "languages": [ + "tam" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cd3e1810510aee192781e40eae1b0ddc", + "text": "Tamil (Sri Lanka) \u0bae\u0ba9\u0bbf\u0ba4\u0baa\u0bcd \u0baa\u0bbf\u0bb1\u0bbf\u0bb5\u0bbf\u0baf\u0bbf\u0ba9\u0bb0\u0bcd \u0b9a\u0b95\u0bb2\u0bb0\u0bc1\u0bae\u0bcd \u0b9a\u0bc1\u0ba4\u0ba8\u0bcd\u0ba4\u0bbf\u0bb0\u0bae\u0bbe\u0b95\u0bb5\u0bc7 \u0baa\u0bbf\u0bb1\u0b95\u0bcd\u0b95\u0bbf\u0ba9\u0bcd\u0bb1\u0ba9\u0bb0\u0bcd; \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0bae\u0ba4\u0bbf\u0baa\u0bcd\u0baa\u0bbf\u0bb2\u0bc1\u0bae\u0bcd, \u0b89\u0bb0\u0bbf\u0bae\u0bc8\u0b95\u0bb3\u0bbf\u0bb2\u0bc1\u0bae\u0bcd \u0b9a\u0bae\u0bae\u0bbe\u0ba9\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd, \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0ba8\u0bbf\u0baf\u0bbe\u0baf\u0ba4\u0bcd\u0ba4\u0bc8\u0baf\u0bc1\u0bae\u0bcd \u0bae\u0ba9\u0b9a\u0bcd\u0b9a\u0bbe\u0b9f\u0bcd\u0b9a\u0bbf\u0baf\u0bc8\u0baf\u0bc1\u0bae\u0bcd \u0b87\u0baf\u0bb1\u0bcd\u0baa\u0ba3\u0bcd\u0baa\u0bbe\u0b95\u0baa\u0bcd \u0baa\u0bc6\u0bb1\u0bcd\u0bb1\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd. \u0b85\u0bb5\u0bb0\u0bcd\u0b95\u0bb3\u0bcd \u0b92\u0bb0\u0bc1\u0bb5\u0bb0\u0bc1\u0b9f\u0ba9\u0bca\u0bb0\u0bc1\u0bb5\u0bb0\u0bcd \u0b9a\u0b95\u0bcb\u0ba4\u0bb0 \u0b89\u0ba3\u0bb0\u0bcd\u0bb5\u0bc1\u0baa\u0bcd \u0baa\u0bbe\u0b99\u0bcd\u0b95\u0bbf\u0bb2\u0bcd \u0ba8\u0b9f\u0ba8\u0bcd\u0ba4\u0bc1\u0b95\u0bca\u0bb3\u0bcd\u0bb3\u0bb2\u0bcd \u0bb5\u0bc7\u0ba3\u0bcd\u0b9f\u0bc1\u0bae\u0bcd.", + "metadata": { + "languages": [ + "tam" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9e55ede50aefd9018f64126e5d20a259", + "text": "Tatar \u0411\u0430\u0440\u043b\u044b\u043a \u043a\u0435\u0448\u0435\u043b\u04d9\u0440 \u0434\u04d9 \u0430\u0437\u0430\u0442 \u04bb\u04d9\u043c \u04af\u0437 \u0430\u0431\u0440\u0443\u0439\u043b\u0430\u0440\u044b \u04bb\u04d9\u043c \u0445\u043e\u043a\u0443\u043a\u043b\u0430\u0440\u044b \u044f\u0433\u044b\u043d\u043d\u0430\u043d \u0442\u0438\u04a3 \u0431\u0443\u043b\u044b\u043f \u0442\u0443\u0430\u043b\u0430\u0440. \u0410\u043b\u0430\u0440\u0433\u0430 \u0430\u043a\u044b\u043b \u04bb\u04d9\u043c \u0432\u04e9\u0497\u0434\u0430\u043d \u0431\u0438\u0440\u0435\u043b\u0433\u04d9\u043d \u04bb\u04d9\u043c \u0431\u0435\u0440-\u0431\u0435\u0440\u0441\u0435\u043d\u04d9 \u043a\u0430\u0440\u0430\u0442\u0430 \u0442\u0443\u0433\u0430\u043d\u0430\u0440\u0447\u0430 [\u0442\u0443\u0433\u0430\u043d\u043d\u0430\u0440\u0447\u0430] \u043c\u04e9\u043d\u04d9\u0441\u04d9\u0431\u04d9\u0442\u0442\u04d9 \u0431\u0443\u043b\u044b\u0440\u0433\u0430 \u0442\u0438\u0435\u0448\u043b\u04d9\u0440.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca7b2ef61ad3e52b7b7873feb9ba85c1", + "text": "Telugu \u0c2a\u0c4d\u0c30\u0c24\u0c3f\u0c2a\u0c24\u0c4d\u0c24\u0c3f\u0c38\u0c4d\u0c35\u0c24\u0c4d\u0c35\u0c2e\u0c41\u0c32 \u0c35\u0c3f\u0c37\u0c2f\u0c2e\u0c41\u0c28 \u0c2e\u0c3e\u0c28\u0c35\u0c41\u0c32\u0c46\u0c32\u0c4d\u0c32\u0c30\u0c41\u0c28\u0c41 \u0c1c\u0c28\u0c4d\u0c2e\u0c24\u0c03 \u0c38\u0c4d\u0c35\u0c24\u0c02\u0c24\u0c4d\u0c30\u0c41\u0c32\u0c41\u0c28\u0c41 \u0c38\u0c2e\u0c3e\u0c28\u0c41\u0c32\u0c41\u0c28\u0c41 \u0c28\u0c17\u0c41\u0c26\u0c41\u0c30\u0c41. \u0c35\u0c3e\u0c30\u0c41 \u0c35\u0c3f\u0c35\u0c47\u0c1a\u0c28-\u0c05\u0c02\u0c24\u0c03\u0c15\u0c30\u0c23 \u0c38\u0c02\u0c2a\u0c28\u0c4d\u0c28\u0c41\u0c32\u0c17\u0c41\u0c1f\u0c1a\u0c47 \u0c2a\u0c30\u0c38\u0c4d\u0c2a\u0c30\u0c2e\u0c41 \u0c2d\u0c4d\u0c30\u0c3e\u0c24\u0c43\u0c2d\u0c3e\u0c35\u0c2e\u0c41\u0c24\u0c4b \u0c35\u0c30\u0c4d\u0c24\u0c3f\u0c02\u0c2a\u0c35\u0c32\u0c2f\u0c41\u0c28\u0c41.", + "metadata": { + "languages": [ + "tel" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8947e9ec5ba76eabce3e2d1e59437be7", + "text": "Tem B\u00e1nl\u028ar\u028a\u0301\u028a \u0269r\u028a\u0301 b\u00e1a ween\u00ed na kez\u00e9\u0144b\u00ed\u00eddi g\u025b b\u0269ka b\u025bd\u025b\u0301\u025b \u0256\u0254\u0254z\u0269\u0301t\u0269 na y\u00edkow\u00e1 k\u025bg\u025b\u0301\u025b \u0256\u00e9y\u00ed-\u0256\u00e9y\u00ed g\u025b. B\u0254w\u025bn\u00e1 laak\u00e1r\u0269 na \u0269r\u028a\u0301t\u0269 b\u0269ka b\u0269\u0269b\u0254\u0301\u0254\u0301z\u0269 b\u0254c\u0254\u0254n\u00e1 \u0256am\u00e1 koob\u00edre c\u0254w\u028ar\u025b.", + "metadata": { + "languages": [ + "ces" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bbf40bd7dea22eb8d81c9b52157a9d78", + "text": "Tetun Ema hotu hotu moris hanesan ho dignidade ho direitu. Sira hotu iha hanoin, konsiensia n'e duni tenki hare malu hanesan espiritu maun-alin.", + "metadata": { + "languages": [ + "fin", + "ind", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "236b9fa6cdadaf909a113daabd6db99e", + "text": "Tetun Dili Ema tomak moris hanesan, ema tomak hanesan, iha direitu hanesan. Ema tomak iha otak ho neon, hotu-hotu sei buka moris hanesan maun ho alin.", + "metadata": { + "languages": [ + "ind", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "4db44873f89a9eec246259109ee43eb2", + "text": "Thai", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "70fb4fd148b0adc870bad4cf3a004e9e", + "text": "\u0e21\u0e19\u0e38\u0e29\u0e22\u0e4c\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e25\u0e32\u0e22\u0e40\u0e01\u0e34\u0e14\u0e21\u0e32\u0e21\u0e35\u0e2d\u0e34\u0e2a\u0e23\u0e30\u0e41\u0e25\u0e30\u0e40\u0e2a\u0e21\u0e2d\u0e20\u0e32\u0e04\u0e01\u0e31\u0e19\u0e43\u0e19\u0e40\u0e01\u0e35\u0e22\u0e23\u0e15\u0e34\u0e28\u0e31\u0e01\u0e14[\u0e40\u0e01\u0e35\u0e22\u0e23\u0e15\u0e34\u0e28\u0e31\u0e01\u0e14\u0e34\u0e4c]\u0e41\u0e25\u0e30\u0e2a\u0e34\u0e17\u0e18\u0e34 \u0e15\u0e48\u0e32\u0e07\u0e21\u0e35\u0e40\u0e2b\u0e15\u0e38\u0e1c\u0e25\u0e41\u0e25\u0e30\u0e21\u0e42\u0e19\u0e18\u0e23\u0e23\u0e21 \u0e41\u0e25\u0e30\u0e04\u0e27\u0e23\u0e1b\u0e0f\u0e34\u0e1a\u0e31\u0e15\u0e34\u0e15\u0e48\u0e2d\u0e01\u0e31\u0e19\u0e14\u0e49\u0e27\u0e22\u0e40\u0e08\u0e15\u0e19\u0e32\u0e23\u0e21\u0e13\u0e4c\u0e41\u0e2b\u0e48\u0e07\u0e20\u0e23\u0e32\u0e14\u0e23\u0e20\u0e32\u0e1e", + "metadata": { + "languages": [ + "tha" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "841a49f9951dd2eb6b4d31049732b46a", + "text": "Thai (2)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "a4b136507e5ed6666129c7a44794fd18", + "text": "\u0e21\u0e19\u0e38\u0e29\u0e22\u0e4c\u0e17\u0e31\u0e49\u0e07\u0e1b\u0e27\u0e07\u0e40\u0e01\u0e34\u0e14\u0e21\u0e32\u0e21\u0e35\u0e2d\u0e34\u0e2a\u0e23\u0e30\u0e41\u0e25\u0e30\u0e40\u0e2a\u0e21\u0e2d\u0e20\u0e32\u0e04\u0e01\u0e31\u0e19\u0e43\u0e19\u0e28\u0e31\u0e01\u0e14\u0e34\u0e4c\u0e28\u0e23\u0e35\u0e41\u0e25\u0e30\u0e2a\u0e34\u0e17\u0e18\u0e34 \u0e15\u0e48\u0e32\u0e07\u0e43\u0e19\u0e15\u0e19\u0e21\u0e35\u0e40\u0e2b\u0e15\u0e38\u0e1c\u0e25\u0e41\u0e25\u0e30\u0e21\u0e42\u0e19\u0e18\u0e23\u0e23\u0e21 \u0e41\u0e25\u0e30\u0e04\u0e27\u0e23\u0e1b\u0e0f\u0e34\u0e1a\u0e31\u0e15\u0e34\u0e15\u0e48\u0e2d\u0e01\u0e31\u0e19\u0e14\u0e49\u0e27\u0e22\u0e08\u0e34\u0e15\u0e27\u0e34\u0e0d\u0e0d\u0e32\u0e13\u0e41\u0e2b\u0e48\u0e07\u0e20\u0e23\u0e32\u0e14\u0e23\u0e20\u0e32\u0e1e", + "metadata": { + "languages": [ + "tha" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8f52798dd21c8472bda701088f7e82ca", + "text": "Themne A kom a\u014bf\u0259m ak\u0259pet b\u025b \u014ba ath\u0259n\u028cn\u025b yi r\u028cwankom. \u0186wa a\u014b ba m\u0259mari m\u0259th\u0259n\u028cn\u025b. \u0186wa a\u014b ba m\u0259fith yi t\u0259chemp. Chiya\u014b, a\u014b yi t\u0259k\u0259 gbasi a\u014bkos \u014ba\u014b m\u0254 k\u0259pa \u014ba t\u0259kom.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "5d770ab20285b54fa9896cd9f26b3d43", + "text": "Tibetan, Central", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "9ff7c25da02c27eefccdaca502af53c1", + "text": "\u0f60\u0f42\u0fb2\u0f7c\u0f0b\u0f56\u0f0b\u0f58\u0f72\u0f60\u0f72\u0f0b\u0f62\u0f72\u0f42\u0f66\u0f0b\u0f62\u0f92\u0fb1\u0f74\u0f51\u0f0b\u0f61\u0f7c\u0f44\u0f66\u0f0b\u0f63\u0f0b\u0f66\u0f90\u0fb1\u0f7a\u0f66\u0f0b\u0f59\u0f58\u0f0b\u0f49\u0f72\u0f51\u0f0b\u0f53\u0f66\u0f0b\u0f46\u0f7a\u0f0b\u0f58\u0f50\u0f7c\u0f44\u0f66\u0f0b\u0f51\u0f44\u0f0c\u0f0d \u0f50\u0f7c\u0f56\u0f0b\u0f50\u0f44\u0f42\u0f72\u0f0b\u0f62\u0f44\u0f0b\u0f51\u0f56\u0f44\u0f0b\u0f60\u0f51\u0fb2\u0f0b\u0f58\u0f49\u0f58\u0f0b\u0f51\u0f74\u0f0b\u0f61\u0f7c\u0f51\u0f0b\u0f63\u0f0d \u0f41\u0f7c\u0f44\u0f0b\u0f5a\u0f7c\u0f62\u0f0b\u0f62\u0f44\u0f0b\u0f56\u0fb1\u0f74\u0f44\u0f0b\u0f42\u0f72\u0f0b\u0f56\u0fb3\u0f7c\u0f0b\u0f62\u0fa9\u0f63\u0f0b\u0f51\u0f44\u0f0b\u0f56\u0f66\u0f58\u0f0b\u0f5a\u0f74\u0f63\u0f0b\u0f56\u0f5f\u0f44\u0f0b\u0f54\u0f7c\u0f0b\u0f60\u0f51\u0f7c\u0f53\u0f0b\u0f54\u0f60\u0f72\u0f0b\u0f60\u0f7c\u0f66\u0f0b\u0f56\u0f56\u0f66\u0f0b\u0f40\u0fb1\u0f44\u0f0b\u0f61\u0f7c\u0f51\u0f0d \u0f51\u0f7a\u0f0b\u0f56\u0f5e\u0f72\u0f53\u0f0b\u0f55\u0f53\u0f0b\u0f5a\u0f74\u0f53\u0f0b\u0f42\u0f45\u0f72\u0f42\u0f0b\u0f42\u0f72\u0f66\u0f0b\u0f42\u0f45\u0f72\u0f42\u0f0b\u0f63\u0f0b\u0f56\u0f74\u0f0b\u0f66\u0fa4\u0f74\u0f53\u0f0b\u0f42\u0fb1\u0f72\u0f0b\u0f60\u0f51\u0f74\u0f0b\u0f64\u0f7a\u0f66\u0f0b\u0f60\u0f5b\u0f72\u0f53\u0f0b\u0f54\u0f60\u0f72\u0f0b\u0f56\u0fb1\u0f0b\u0f66\u0fa4\u0fb1\u0f7c\u0f51\u0f0b\u0f40\u0fb1\u0f44\u0f0b\u0f63\u0f42\u0f0b\u0f63\u0f7a\u0f53\u0f0b\u0f56\u0f66\u0f9f\u0f62\u0f0b\u0f51\u0f42\u0f7c\u0f66\u0f0b\u0f54\u0f0b\u0f61\u0f72\u0f53\u0f0e", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8af88623529d7fac1f9e181cf1759b64", + "text": "Ticuna Ng\u1ebdxguma nabuxgu i du\u00fc\u0303x\u00fc\u0303g\u00fc r\u00fc gux\u00fc\u0303ma naw\u00fcxigu, r\u00fc tataxuma ya tex\u00e9 ya tog\u00fcar\u00fc yexera ix\u0129s\u1ebd. R\u00fc gux\u00fc\u0303ma nax\u00e3\u00e3\u1ebdg\u00fc r\u00fc ng\u1ebdmaca\u0331x r\u00fc name nix\u0129 na n\u00fcg\u00fcma\u00e3 namec\u00fcmax\u00fc\u0303 \u0129 gux\u00fc\u0303ma \u0129 du\u00fc\u0303x\u00fc\u0303g\u00fc.", + "metadata": { + "languages": [ + "tur", + "vie", + "som", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "3a1e54e52c1e8f2960b9f52ba81d5b61", + "text": "Tigrigna \u1265\u1218\u1295\u1345\u122d \u12ad\u1265\u122d\u1295 \u1218\u1230\u120d\u1295 \u12a9\u120e\u121d \u1230\u1263\u1275 \u12a5\u1295\u1275\u12cd\u1208\u12f1 \u1290\u1343\u1295 \u121b\u12d5\u122a\u1295 \u12a5\u12ee\u121d\u1361\u1361 \u121d\u1235\u1275\u12cd\u12d3\u120d\u1295 \u1215\u120d\u1293\u1295 \u12dd\u1270\u12d3\u12f0\u120e\u121d \u1265\u121d\u12c3\u1296\u121d \u1295\u1215\u12f5\u1215\u12f6\u121d \u1265\u1215\u12cd\u1290\u1273\u12ca \u1218\u1295\u1348\u1235 \u12ad\u1270\u1213\u120b\u1208\u12e9 \u12a6\u1208\u12ce\u121d\u1361\u1361", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "de5f844ed144e72bc158df450d496c1f", + "text": "Tiv I mar maor ken kpan ga, nan ngu a icivir man mbamkpeiyol cii. I na nan mhen man ishima i kaven kwagh; nahan gba keng u nana tema a orgen ken mtem u angbian a angbian.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dce66eb1491ee0e05782cd7b4060bdf1", + "text": "Toba 'Enauac na naaxat shi\u1ef9axauapi na mayipi huesochiguii qataq 'eeta'a't da l'amaqchic qataq da 'enec qataq \u1ef9ataqta \u1ef9a\u1ef9ate'n naua lataxaco qataq nua no'o'n nvil\u1ef9axaco, qaq \u1ef9oqo'oyi iuen da i 'oonolec \u1ef9ataqta itauan ichoxoden ca l\u1ef9a", + "metadata": { + "languages": [ + "som", + "cat" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d4b675c94f0bd52682c828f5060488a5", + "text": "Tojolabal Spetsanal ja swinkil ja lu\u2019um k\u2019inali junxta wax jul schonjel, sok ja sijpanub\u2019ali, ja yuj ojni b\u2019ob\u2019 sk\u2019u\u2019luk ja jas sk\u2019ana-i ja b\u2019as lekilali, ja yuj ja ay sk\u2019ujoli sok ay spensari t\u2019ilan oj yilsb\u2019aje lek sok ja smoj jumasa.", + "metadata": { + "languages": [ + "slv", + "hrv", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "baa6d468177ac269426d4a3d0b02bb93", + "text": "Tok Pisin Yumi olgeta mama karim umi long stap fri na wankain long wei yumi lukim i gutpela na strepela tru. Uumi olgeta igat ting ting bilong wanem samting I rait na rong na mipela olgeta I mas mekim gutpela pasin long ol narapela long tingting bilong brata susa.", + "metadata": { + "languages": [ + "tgl", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "a264ccf464d28bf6d1ca7b13ae28ec33", + "text": "Tonga Bantu boonse balazyalwa kabaangulukide alimwi kabeelene alimwi akwaanguluka kucita zyobayanda. Balazyalwa amaanu akuyeeya, aakusala alimwi beelede kulanga bambi mbuli banabokwabo.", + "metadata": { + "languages": [ + "swa", + "som", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "11c1506a0e4eb0a3616787ebc32828da", + "text": "Tongan Ko e kotoa \u2018o ha\u2019a tangata \u2018oku fanau\u2019i mai \u2018oku tau\u2019ataina pea tatau \u2018i he ngeia mo e ngaahi totonu. Na\u2019e fakanaunau\u2019i kinautolu \u2018aki \u2018a e \u2018atamai mo e konisenisi pea \u2018oku totonu ke nau feohi \u2018i he laumalie \u2018o e nofo fakatautehina.", + "metadata": { + "languages": [ + "swa", + "ind", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dab04d4ff36fa1a54202c63fe319d7f5", + "text": "Totonac, Papantla Wakg lakch'ixkuwin talakgawan nak ka'unin niti ka'akgch'apawalinit nachuna wakg takg'alhi ixtamaxanatkan chu tu kaminini, je'e wanp'utun xlakata wakg talakpuwanan, talalakgk'atsan liwakg, talakask'ini xlakata wakg natalamakgtaya.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "49ac7c418a1a33c64e2c3e228669acea", + "text": "Tsonga (Mozambique) Vanhu hin'kwavu va psaliwili na va khululek\u00ecle, funthsi va fana hi lisima ni tinfaneno. V\u00e0 psaliwili ni nyiko ya ku pimisa ni ku yehleketa; hi kolahu, va fanela ku hanya hi moya wa umb\u00eclu ni unghani.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d67ec59f83da37904b828e20b79c6f98", + "text": "Tsonga (Zimbabwe) Vanhu hinkwavo va tswariwa va tshunxekile naswona va ringanile eka tifanelo na xindzhuti. Va havaxerile miehleketo na tshiriti kumbe ku tiva xo biha ni xta kahle nakambe va fanele va kombana moya wa vukwavo.", + "metadata": { + "languages": [ + "swa", + "est" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7d60ead79ecb4818a244fb15daa7b691", + "text": "Tswana Batho botlhe ba tsetswe ba gololosegile le go lekalekana ka seriti le ditshwanelo. Ba abetswe go akanya le maikutlo, mme ba tshwanetse go direlana ka mowa wa bokaulengwe.", + "metadata": { + "languages": [ + "swa", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3ecfed863a5eed35ac7bcdc4f1ebcf6d", + "text": "Turkish B\u00fct\u00fcn insanlar h\u00fcr, haysiyet ve haklar bak\u0131m\u0131ndan e\u015fit do\u011farlar. Ak\u0131l ve vicdana sahiptirler ve birbirlerine kar\u015f\u0131 karde\u015flik zihniyeti ile hareket etmelidirler.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ec6b4429d4b16c9725f0f1420314a928", + "text": "Turkmen (Cyrillic) \u0425\u0435\u043c\u043c\u0435 \u0430\u0434\u0430\u043c\u043b\u0430\u0440 \u04e9\u0437 \u043c\u0435\u0440\u0442\u0435\u0431\u0435\u0441\u0438 \u0432\u0435 \u0445\u0443\u043a\u0443\u043a\u043b\u0430\u0440\u044b \u0431\u043e\u044e\u043d\u0447\u0430 \u0434\u0435\u04a3 \u044f\u0433\u0434\u0430\u0439\u0434\u0430 \u0434\u04af\u043d\u0439\u04d9 \u0438\u043d\u0439\u04d9\u0440\u043b\u0435\u0440. \u041e\u043b\u0430\u0440\u0430 \u0430\u04a3 \u0445\u0435\u043c \u0432\u044b\u0497\u0434\u0430\u043d \u0431\u0435\u0440\u043b\u0435\u043d\u0434\u0438\u0440 \u0432\u0435 \u043e\u043b\u0430\u0440 \u0431\u0438\u0440\u2010\u0431\u0438\u0440\u043b\u0435\u0440\u0438 \u0431\u0438\u043b\u0435\u043d \u0434\u043e\u0433\u0430\u043d\u043b\u044b\u043a \u0440\u0443\u0445\u0443\u043d\u0434\u0430\u043a\u044b \u0433\u0430\u0440\u0430\u0439\u044b\u0448\u0434\u0430 \u0431\u043e\u043b\u043c\u0430\u043b\u044b\u0434\u044b\u0440\u043b\u0430\u0440.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "27683edb29bca811bea3008052c0fc9f", + "text": "Turkmen (Latin) Adamlary\u0148 hemmesi azat dogul\u00fdarlar we \u00f6z mertebesi hem\u2010de hukuklary bo\u00fdun\u00e7a ilkiba\u015fdan de\u0148dirler. Olara ozal\u2010ba\u015fdan a\u0148, ynsap berlendir we biri\u2010birine \u00f6zara doganlyk ruhunda \u00e7emele\u015fmek olary\u0148 \u00fdara\u015fygydyr.", + "metadata": { + "languages": [ + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6b9f05c9e0fdf0e6de36b54f1c82f5d0", + "text": "Tuva \u0411\u04af\u0433\u04af \u043a\u0438\u0436\u0438\u043b\u0435\u0440 \u0445\u043e\u0441\u0442\u0443\u0433 \u0431\u0430\u0437\u0430 \u043c\u04e9\u0437\u04af\u0437\u04af \u0431\u043e\u043b\u0433\u0430\u0448 \u044d\u0440\u0433\u0435\u043b\u0435\u0440\u0438 \u0434\u0435\u04a3 \u043a\u044b\u043b\u0434\u044b\u0440 \u0442\u04e9\u0440\u04af\u0442\u0442\u04af\u043d\u0435\u0440. \u041e\u043b\u0430\u0440\u0433\u0430 \u0443\u0433\u0430\u0430\u043d\u0441\u0430\u0440\u044b\u044b\u043b \u0431\u043e\u043b\u0433\u0430\u0448 \u0430\u0440\u044b\u043d-\u043d\u04af\u04af\u0440 \u0431\u0435\u0440\u0434\u0438\u043d\u0433\u0435\u043d \u0431\u043e\u043b\u0443\u0440 \u0431\u043e\u043b\u0433\u0430\u0448 \u043e\u043b\u0430\u0440 \u0431\u043e\u0442-\u0431\u043e\u0442\u0442\u0430\u0440\u044b\u043d\u0433\u0430 \u0430\u043a\u044b-\u0434\u0443\u04a3\u043c\u0430\u043b\u044b\u0448\u043a\u044b \u0445\u0430\u043c\u0430\u0430\u0440\u044b\u043b\u0433\u0430\u043d\u044b \u043a\u04e9\u0440\u0433\u04af\u0437\u0435\u0440 \u0443\u0436\u0443\u0440\u043b\u0443\u0433.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "527f7d8b2d19b7c6c3f2fadc70ada262", + "text": "Twi (Akuapem) W\u0254awo adesamma nyinaa s\u025b nnipa a w\u0254w\u0254 ahofadi. W\u0254n nyinaa w\u0254 nidi ne ky\u025bfa koro. W\u0254w\u0254 adwene ne ahonim, na \u025bs\u025b s\u025b wobu w\u0254n ho w\u0254n ho s\u025b anuanom.", + "metadata": { + "languages": [ + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "aefbdde1da2ecc73208751b3c330bb3e", + "text": "Twi (Asante) Nnipa nyinaa y\u025b p\u025b. Na w\u0254de adwene ne nyansa na ab\u0254 obiara. \u0190no nti, \u025bs\u025b s\u025b obiara d\u0254 ne y\u0254nko, bu ne y\u0254nko, di ne y\u0254nko ni.", + "metadata": { + "languages": [ + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4b0bd8eaae3f12feed9188c010027eb7", + "text": "Tzeltal, Oxchuc Spisil winiketik te ya xbejk\u00b4ajik ta k\u00b4inalil ay jrerechotik, mayuk mach\u00b4a chukul ya xbejka, ya jnatik stojol te jpisiltik ay snopibal sok sbijil joltik, ja\u00b4 me k\u00b4ux ya kaibatik ta jujun tul.", + "metadata": { + "languages": [ + "ind", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3a1d8b7b6302ae4de3c1c05a5c4f8fc7", + "text": "Tzotzil (Chamula) Skotol vinik o ants ta spejel balumile k\u2019olem x-hayan i ko\u2019ol ta sch\u2019ulal i sderechoetik i, skotol k\u2019ux-elan oyike oy srasonik y slekilalik, sventa skuxijik lekn\u00f3o ta ju jun ju ju vo.", + "metadata": { + "languages": [ + "hrv", + "slv", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "9c8ce1a1d4b031909f2b8d5c31bc3084", + "text": "Uduk Aris \u2019kwaniny\u2019ceshi \u2019baar mo dho\u2019thkunu \u2019ba\u1e35any mo dhali mmomiiya \u1e6fu\u2019c imon\u1e6fal \u2019de/ mo dhali mii ma \u1e35ar/e mo. Uni mini ta gi gwo mo dhali mii mo dhali uni mini mii ka karambuye/ \u2019kup\u0331 ki cin tiya mo e shi/in mo dhali mii kun tanu ikam mo.", + "metadata": { + "languages": [ + "swa", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "35ad852b028b17863397cd23a741e776", + "text": "Ukrainian \u0412\u0441\u0456 \u043b\u044e\u0434\u0438 \u043d\u0430\u0440\u043e\u0434\u0436\u0443\u044e\u0442\u044c\u0441\u044f \u0432\u0456\u043b\u044c\u043d\u0438\u043c\u0438 \u0456 \u0440\u0456\u0432\u043d\u0438\u043c\u0438 \u0443 \u0441\u0432\u043e\u0457\u0439 \u0433\u0456\u0434\u043d\u043e\u0441\u0442\u0456 \u0442\u0430 \u043f\u0440\u0430\u0432\u0430\u0445. \u0412\u043e\u043d\u0438 \u043d\u0430\u0434\u0456\u043b\u0435\u043d\u0456 \u0440\u043e\u0437\u0443\u043c\u043e\u043c \u0456 \u0441\u043e\u0432\u0456\u0441\u0442\u044e \u0456 \u043f\u043e\u0432\u0438\u043d\u043d\u0456 \u0434\u0456\u044f\u0442\u0438 \u0443 \u0432\u0456\u0434\u043d\u043e\u0448\u0435\u043d\u043d\u0456 \u043e\u0434\u0438\u043d \u0434\u043e \u043e\u0434\u043d\u043e\u0433\u043e \u0432 \u0434\u0443\u0441\u0456 \u0431\u0440\u0430\u0442\u0435\u0440\u0441\u0442\u0432\u0430.", + "metadata": { + "languages": [ + "ukr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "2da70f2c0e7850d3cb64606cb0479fc9", + "text": "Umbundu Omanu vosi vacitiwa valipwa kwenda valisoka kovina vyosikwenda komoko. Ovo vakwete esunga kwenda, kwenda olondunge kwenje ovo vat\u00eala okuliteywila kuvamwe kwenda vakwavo vesokolwilo lyocisola.", + "metadata": { + "languages": [ + "swa", + "hrv", + "afr" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "64edd93c5fb9067ee95bf26fb4495608", + "text": "Umbundu (011) Omanu vosi kilu lieve va citiwa lonjila yimosi leyovo limosi, lomoko yimosi kuenda unu umosi, kuenje momo vosi va kuete olondunge, va sesamela okulisumbila pokati ndavamanji.", + "metadata": { + "languages": [ + "hrv", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1124c31d72fecef43b62a1802dfaa8b8", + "text": "Urarina Ita rijiicha itolere cacha. Aihana jaun, ita belaain, naojoain neuruhine laurilaurichuru nenacaauru aina itolere cachaauru.", + "metadata": { + "languages": [ + "swa", + "ron", + "fin" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "17e2b5b5c80c984c98843bbed39884c4", + "text": "Urdu \u062a\u0645\u0627\u0645 \u0627\u0646\u0633\u0627\u0646 \u0622\u0632\u0627\u062f \u0627\u0648\u0631 \u062d\u0642\u0648\u0642 \u0648 \u0639\u0632\u062a \u06a9\u06d2 \u0627\u0639\u062a\u0628\u0627\u0631 \u0633\u06d2 \u0628\u0631\u0627\u0628\u0631 \u067e\u06cc\u062f\u0627 \u06c1\u0648\u0626\u06d2 \u06c1\u06cc\u06ba\u06d4 \u0627\u0646\u06c1\u06cc\u06ba \u0636\u0645\u06cc\u0631 \u0627\u0648\u0631 \u0639\u0642\u0644 \u0648\u062f\u06cc\u0639\u062a \u06c1\u0648\u0626\u06cc \u06c1\u06d2\u06d4 \u0627\u0633 \u0644\u0626\u06d2 \u0627\u0646\u06c1\u06cc\u06ba \u0627\u06cc\u06a9 \u062f\u0648\u0633\u0631\u06d2 \u06a9\u06d2 \u0633\u0627\u062a\u06be \u0628\u06be\u0627\u0626\u06cc \u0686\u0627\u0631\u06d2 \u06a9\u0627 \u0633\u0644\u0648\u06a9 \u06a9\u0631\u0646\u0627 \u0686\u0627\u06c1\u06cc\u0626\u06d2\u06d4", + "metadata": { + "languages": [ + "urd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "64062747e4a49e81a0ff7fe76c935f92", + "text": "Urdu (2) \u062a\u0645\u0627\u0645 \u0627\u0646\u0633\u0627\u0646 \u0622\u0632\u0627\u062f \u0627\u0648\u0631 \u062d\u0642\u0648\u0642 \u0648 \u0639\u0632\u062a \u06a9\u06d2 \u0627\u0639\u062a\u0628\u0627\u0631 \u0633\u06d2 \u0628\u0631\u0627\u0628\u0631 \u067e\u06cc\u062f\u0627 \u06c1\u0648\u0626\u06d2 \u06c1\u06cc\u06ba\u06d4 \u0627\u0646\u06c1\u06cc\u06ba \u0636\u0645\u06cc\u0631 \u0627\u0648\u0631 \u0639\u0642\u0644 \u0648\u062f\u06cc\u0639\u062a \u06c1\u0648\u0626\u06cc \u06c1\u06d2\u06d4 \u0627\u0633 \u0644\u06cc\u06d2 \u0627\u0646\u06c1\u06cc\u06ba \u0627\u06cc\u06a9 \u062f\u0648\u0633\u0631\u06d2 \u06a9\u06d2 \u0633\u0627\u062a\u06be \u0628\u06be\u0627\u0626\u06cc \u0686\u0627\u0631\u06d2 \u06a9\u0627 \u0633\u0644\u0648\u06a9 \u06a9\u0631\u0646\u0627 \u0686\u0627\u06c1\u06cc\u06d2\u06d4", + "metadata": { + "languages": [ + "urd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c0f369076ccc7b4f6949b46f78e9c721", + "text": "Uyghur (Arabic) \u06be\u06d5\u0645\u0645\u06d5 \u0626\u0627\u062f\u06d5\u0645 \u0632\u0627\u0646\u0649\u062f\u0649\u0646\u0644\u0627 \u0626\u06d5\u0631\u0643\u0649\u0646\u060c \u0626\u0649\u0632\u0632\u06d5\u062a-\u06be\u06c6\u0631\u0645\u06d5\u062a \u06cb\u06d5 \u06be\u0648\u0642\u06c7\u0642\u062a\u0627 \u0628\u0627\u067e\u0628\u0627\u0631\u0627\u06cb\u06d5\u0631 \u0628\u0648\u0644\u06c7\u067e \u062a\u06c7\u063a\u06c7\u0644\u063a\u0627\u0646. \u0626\u06c7\u0644\u0627\u0631 \u0626\u06d5\u0642\u0649\u0644\u063a\u06d5 \u06cb\u06d5 \u06cb\u0649\u062c\u062f\u0627\u0646\u063a\u0627 \u0626\u0649\u06af\u06d5 \u06be\u06d5\u0645\u062f\u06d5 \u0628\u0649\u0631-\u0628\u0649\u0631\u0649\u06af\u06d5 \u0642\u06d0\u0631\u0649\u0646\u062f\u0627\u0634\u0644\u0649\u0642 \u0645\u06c7\u0646\u0627\u0633\u0649\u06cb\u0649\u062a\u0649\u06af\u06d5 \u062e\u0627\u0633 \u0631\u0648\u06be \u0628\u0649\u0644\u06d5\u0646 \u0645\u0648\u0626\u0627\u0645\u0649\u0644\u06d5 \u0642\u0649\u0644\u0649\u0634\u0649 \u0643\u06d0\u0631\u06d5\u0643.", + "metadata": { + "languages": [ + "ara" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c9695addaae400cf93180490aae4c5b8", + "text": "Uyghur (Latin) hemme adem zatidinla erkin, izzet-h\u00f6rmet we hoquqta babbarawer bolup tughulghan. ular eqilghe we wijdan'gha ige hemde bir-birige q\u00e9rindashliq munasiwitige xas roh bilen muamile qilishi k\u00e9rek.", + "metadata": { + "languages": [ + "nld", + "eng", + "deu", + "tur" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "cf037543ae7e29089220134bd8d9fc80", + "text": "Uzbek, Northern (Cyrillic) \u0411\u0430\u0440\u0447\u0430 \u043e\u0434\u0430\u043c\u043b\u0430\u0440 \u044d\u0440\u043a\u0438\u043d, \u049b\u0430\u0434\u0440\u2010\u049b\u0438\u043c\u043c\u0430\u0442 \u0432\u0430 \u04b3\u0443\u049b\u0443\u049b\u043b\u0430\u0440\u0434\u0430 \u0442\u0435\u043d\u0433 \u0431\u045e\u043b\u0438\u0431 \u0442\u0443\u0493\u0438\u043b\u0430\u0434\u0438\u043b\u0430\u0440. \u0423\u043b\u0430\u0440 \u0430\u049b\u043b \u0432\u0430 \u0432\u0438\u0436\u0434\u043e\u043d \u0441\u043e\u04b3\u0438\u0431\u0438\u0434\u0438\u0440\u043b\u0430\u0440 \u0432\u0430 \u0431\u0438\u0440\u2010\u0431\u0438\u0440\u043b\u0430\u0440\u0438\u0433\u0430 \u0431\u0438\u0440\u043e\u0434\u0430\u0440\u043b\u0430\u0440\u0447\u0430 \u043c\u0443\u043e\u043c\u0430\u043b\u0430 \u049b\u0438\u043b\u0438\u0448\u043b\u0430\u0440\u0438 \u0437\u0430\u0440\u0443\u0440.", + "metadata": { + "languages": [ + "mkd" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f96f007fae71f3dbb5cf107a67339f62", + "text": "Uzbek, Northern (Latin) Barcha odamlar erkin, qadr\u2010qimmat va huquqlarda teng bo\u02bblib tug\u02bbiladilar. Ular aql va vijdon sohibidirlar va bir\u2010birlariga birodarlarcha muomala qilishlari zarur.", + "metadata": { + "languages": [ + "tur", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4309a801882998d4a87ec4393c62eb5b", + "text": "Vai \ua549\ua55c\ua56e \ua514\ua60b \ua5b8 \ua530 \ua5cb\ua60b \ua56e\ua568 \ua514\ua60b \ua5b8 \ua54e \ua549\ua5b8\ua54a \ua574\ua583 \ua543\ua524\ua602 \ua5f1, \ua549\ua5b7 \ua5ea\ua5e1 \ua53b\ua524 \ua5cf\ua5d2\ua5e1 \ua54e \ua5ea \ua549\ua5b8\ua54a \ua58f\ua54e. \ua549\ua561 \ua58f \ua5f3\ua56e\ua54a \ua5cf \ua56a \ua5d3 \ua549\ua5b7 \ua549\ua5b8 \ua558\ua55e \ua5ea. \ua58f\ua5b7 \ua549\ua5b8\ua527 \ua58f \ua5b8 \ua55a\ua54c\ua602 \ua5f7\ua524 \ua55e \ua603\ua5b7 \ua609\ua527 \ua5e0\ua5bb \ua55e \ua5b4\ua60b \ua533\ua569 \ua549\ua5b8 \ua5f3.", + "metadata": { + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8874ff5275f95f22ade2d05b19b84596", + "text": "Venda Vhathu vho\u1e71he vha bebwa vhe na mbofholowo nahone vha tshi lingana siani \u1e3da tshirunzi na pfanelo. Vhathu vho\u1e71he vho \u1e4bewa mihumbulo na mvalo ngauralo vha tea u konou farana sa vhathu vhathihi.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1b4e3e7ad00ef96ec0938e98c22ac4d7", + "text": "Venda Vhathu vho\u1e71he vha bebwa vhe na mbofholowo nahone vha tshi lingana siani \u1e3da tshirunzi na pfanelo. Vhathu vho\u1e71he vho \u1e4bewa mihumbulo na mvalo ngauralo vha tea u konou farana sa vhathu vhathihi.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "57f8d88a5300439c2e78d95d9954dd1b", + "text": "Venetian Tuti i \u00e8sari umani i nase \u0142\u00ecbari e conpanji par dinjit\u00e0 e deriti. I ze dot\u00e0i de rajon e de cosiensa e i ga da conportarse intr\u00e0 de \u0142ori co sp\u00ecrito de frade\u0142i.", + "metadata": { + "languages": [ + "ita", + "por" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "bde94a10001841ef9fad0f19311e6fa9", + "text": "Veps Kaik mehed su\u0308nduba joudajin i kohtai\u017ein, u\u0308hteji\u010d\u010din i\u010deze arvokahudes i oiktusi\u0161. Heile om anttud mel\u2019 i huiktusentund i heile tari\u017e ko\u017euda toine toi\u017eenke kut vel\u2019l\u2019kundad.", + "metadata": { + "languages": [ + "est", + "slv" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c6836fc94a9a2261da5605eae88ea21f", + "text": "Vietnamese T\u00e2\u0301t ca\u0309 mo\u0323i ng\u01b0\u01a1\u0300i sinh ra \u0111\u00ea\u0300u \u0111\u01b0\u01a1\u0323c t\u01b0\u0323 do va\u0300 bi\u0300nh \u0111\u0103\u0309ng v\u00ea\u0300 nh\u00e2n ph\u00e2\u0309m va\u0300 quy\u00ea\u0300n. Mo\u0323i con ng\u01b0\u01a1\u0300i \u0111\u00ea\u0300u \u0111\u01b0\u01a1\u0323c ta\u0323o hoa\u0301 ban cho ly\u0301 tri\u0301 va\u0300 l\u01b0\u01a1ng t\u00e2m va\u0300 c\u00e2\u0300n pha\u0309i \u0111\u00f4\u0301i x\u01b0\u0309 v\u01a1\u0301i nhau trong ti\u0300nh b\u0103\u0300ng h\u01b0\u0303u.", + "metadata": { + "languages": [ + "vie" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "485844252e980bfa24ace2ee02a33f60", + "text": "Vietnamese (Han nom)", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "Title", + "element_id": "294055dfb0c1131395070d727e81fde6", + "text": "\u7562\u54ff\u6bcf\ud840\ude9b\u751f\ud841\udea2\u8abf\u5f97\u81ea\u7531\u5427\u5e73\u7b49\ud85d\uddf1\u4eba\u54c1\u5427\u6b0a\u3002\u6bcf\ud846\udd75\ud840\ude9b\u8abf\u5f97\u9020\u5316\u9812\u6731\u7406\u667a\u5427\u826f\u5fc3\u5427\u52e4\u6c9b\u5c0d\u8655\ud84a\udf72\u81ae\ud856\ude9d\u60c5\u670b\u53cb\u3002", + "metadata": { + "languages": [ + "kor", + "zho" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4ab64de143568003ad62ca2cf3c8cda3", + "text": "Waama Yiriba na b\u00e0 sikindo dare b\u00e0 m\u025b\u025bri, da seena yirimma mii b\u00e0 ta da i n\u025bki b\u00e0 t\u0254\u0254ba.", + "metadata": { + "languages": [ + "som", + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "88700f6c9f719c0f7ad537b0fe24d46d", + "text": "Walloon Tos l\u00e8s-omes vin\u00e8t-st-\u00e5 monde l\u00eebes, \u00e8t so-l'minme p\u00eed po \u00e7ou qu'\u00e8nn'\u00e8st d'leu dignit\u00e9 \u00e8t d'leus dre\u00fbts. I n'sont nin fo\u00fb r\u00eazon \u00e8t-z-ont-i le\u00fb consyince po z\u00e8ls, \u00e7ou qu'\u00e8lz\u00e8s de\u00fbt miner a s'kid\u00fbre onk' po l'\u00f4te tot come d\u00e8s fr\u00e9s.", + "metadata": { + "languages": [ + "fra" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "4cc64fb61f5a74a71a90733404e04f4d", + "text": "Waorani Tomamo waomo ekame wee anamay inani tomemo kewengi beye tomamo neemompa noynga impa aye anobay impa wadani inanite wakeki beye angampa.", + "metadata": { + "languages": [ + "swa", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "800591b9820914f6f735094c30dd8c74", + "text": "Waray-Waray Nga an ngatanan nga mga tawo, nahimugso talwas ug katpong ha ira dignidad ug katdungan. Hira natawo dinhi ha tuna mayda konsensya ug isip ug kaangayan gud la nga an ira pagtagad ha tagsatagsa sugad hin magburugto.", + "metadata": { + "languages": [ + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "25c9bb862536e9e520792ea8724608de", + "text": "Wayuu Naa wayuukana jemeishi s\u00fcp\u00fcla taashi s\u00fcma wanawa s\u00fclu'u nakua'ipa, aka m\u00fcin yaa epijainjana s\u00fcnain anajiranawaa a'in nama nap\u00fcshi.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b4265fbb8924aeeb84569e7b2e4e3197", + "text": "Welsh Genir pawb yn rhydd ac yn gydradd \u00e2\u2019i gilydd mewn urddas a hawliau. Fe\u2019u cynysgaeddir \u00e2 rheswm a chydwybod, a dylai pawb ymddwyn y naill at y llall mewn ysbryd cymodlon.", + "metadata": { + "languages": [ + "cym" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "8799ac3c8264dbd02b24e5484e28ea2d", + "text": "Wolof Doomi aadama y\u00e9pp danuy juddu, yam ci tawfeex ci sag ak sa\u00f1-sa\u00f1. Nekk na it ku xam d\u00ebgg te \u00e0nd na ak xelam, te war naa j\u00ebflante ak nawleen, te teg ko ci w\u00e0llu mbokk.", + "metadata": { + "languages": [ + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e341d3f3abdd74df58471e96e0695a4b", + "text": "Xhosa Bonke abantu bazalwa bekhululekile belingana ngesidima nangokweemfanelo. Bonke abantu banesiphiwo sesazela nesizathu sokwenza isenzo ongathanda ukuba senziwe kumzalwane wakho.", + "metadata": { + "languages": [ + "swa", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "64fadcc858963f78ddee929df92dd08e", + "text": "Yagua Ne sarupay nijyami cumudeju darvantyamuy javatyasjiu. Jachipiyadati mirvara samirva, mirvamuy ne samirva. Ramunltiy sarivichanichara samirvariy jityunu vichavay.", + "metadata": { + "languages": [ + "hrv", + "tur", + "ind" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b1da3b28878be3ee9c9045f0c9223c84", + "text": "Yakut \u0414\u044c\u043e\u043d \u0431\u0430\u0440\u044b\u0442\u0430 \u0431\u044d\u0439\u044d \u0441\u0443\u043e\u043b\u0442\u0430\u0442\u044b\u0433\u0430\u0440 \u0443\u043e\u043d\u043d\u0430 \u0431\u044b\u0440\u0430\u0430\u0431\u044b\u0433\u0430\u0440 \u0442\u044d\u04a5 \u0431\u0443\u043e\u043b\u0430\u043d \u0442\u04e9\u0440\u04af\u04af\u043b\u043b\u044d\u0440. \u041a\u0438\u043d\u0438\u043b\u044d\u0440 \u0431\u0430\u0440\u044b \u04e9\u0440\u043a\u04e9\u043d \u04e9\u0439\u0434\u04e9\u04e9\u0445, \u0441\u0443\u043e\u0431\u0430\u0441\u0442\u0430\u0430\u0445 \u0431\u0443\u043e\u043b\u0430\u043d \u0442\u04e9\u0440\u04af\u04af\u043b\u043b\u044d\u0440, \u0443\u043e\u043d\u043d\u0430 \u0431\u044d\u0439\u044d \u0431\u044d\u0439\u044d\u043b\u044d\u0440\u0438\u0433\u044d\u0440 \u0442\u044b\u043b\u0433\u0430 \u043a\u0438\u0438\u0440\u0438\u043d\u0438\u0433\u044d\u0441 \u0431\u044b\u04bb\u044b\u044b\u043b\u0430\u0440\u0430 \u0434\u043e\u0495\u043e\u0440\u0434\u043e\u04bb\u0443\u0443 \u0442\u044b\u044b\u043d\u043d\u0430\u0430\u0445 \u0431\u0443\u043e\u043b\u0443\u043e\u0445\u0442\u0430\u0430\u0445.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "53f4d4779755796c4b53e9945f211ced", + "text": "Yanesha\u02bc Allohueney \u00f1e\u00f1tey arrom\u00f1atey att\u0303o ye'\u00f1alletyesa arr patsro e'\u00f1e att\u0303ecma cohuen yesherb\u0303a'yen. \u00d1am\u0303a yechyen allpon derechos att\u0303och e'\u00f1ech cohueno'tsa'yeney arr patsro. \u00d1am\u0303a allohuen att\u0303ecma yechyen alloch yoct\u0303ape' chyen cohuen \u00f1am\u0303a ye\u00f1otyen yeyoc\u0308hro \u00f1e\u00f1t \u0303e'ne pocte' enten ache\u00f1enesha' \u00f1am\u0303a \u00f1e\u00f1t \u0303ama pocteye' enteneto. Ye\u00f1ote\u00f1 a\u00f1 poctetsa e'\u00f1e yemo'nashe\u00f1 yep\u0303annena ama't ora allohuen allpon ache\u00f1enesha' \u00f1e\u00f1t \u0303a\u00f1e patsro'tsa'yeney.", + "metadata": { + "languages": [ + "spa", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1484d1c7c562268257922f9f0522d183", + "text": "Yanomam\u00f6 K\u00f5mi th\u00eb p\u00eb r\u00eb p\u00ebripraw\u00eb r\u00eb piy\u00ebk\u00ebi, he usukuw\u00eb th\u00eb p\u00eb keprou ai th\u00eb \u00e3 r\u00ebamaih\u00e3 no \u00e3 heparohow\u00eb, totihitaw\u00eb th\u00eb p\u00eb ri\u00e3 r\u1ebd thaiwehei hami, th\u00eb p\u00eb puhi tao k\u00e3i p\u00ebrihiw\u00ebha, th\u00eb p\u00eb puhi k\u00e3i katehew\u00ebha haw\u00eb kama th\u00eb p\u00eb mashi sh\u0129ro p\u00ebrihimop\u00eb.", + "metadata": { + "languages": [ + "sqi" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f0d349994b009758a91544c530c1cdd1", + "text": "Yao Wandu wosope akasapagwa ni ufulu ni uchimbichimbi wakulandana. Asapagwa ni lunda, niwakupakombola ganisya, m'yoyo kukusosekwa kuti mundu jwalijose am'woneje mundu jwimwe mpela mlongomjakwe.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "6e2772e24613e482dbe3ec725643ea7a", + "text": "Yapese Gubine gidii mani gargeleg nga faileng nibapuf matt\u02bcawen nge rogon. Bay laniyan nipii e nam, ere ngauda ted matt\u02bcaawen e chaa niba chugur ngoded nimod walag dad.", + "metadata": { + "languages": [ + "tgl", + "cym" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "dd0ec8c9f26cfc60d56857c55e78705f", + "text": "Yiddish, Eastern \u05d9\u05e2\u05d3\u05e2\u05e8 \u05de\u05e2\u05e0\u05d8\u05e9 \u05f0\u05e2\u05e8\u05d8 \u05d2\u05e2\u05d1\u05f1\u05e8\u05df \u05e4\u05bf\u05e8\u05f2\u05b7 \u05d0\u05d5\u05df \u05d2\u05dc\u05f2\u05b7\u05da \u05d0\u05d9\u05df \u05db\u05bc\u05d1\u05bf\u05d5\u05d3 \u05d0\u05d5\u05df \u05e8\u05e2\u05db\u05d8. \u05d9\u05e2\u05d3\u05e2\u05e8 \u05f0\u05e2\u05e8\u05d8 \u05d1\u05d0\u05b7\u05e9\u05d0\u05b8\u05e0\u05e7\u05df \u05de\u05d9\u05d8 \u05e4\u05bf\u05d0\u05b7\u05e8\u05e9\u05d8\u05d0\u05b7\u05e0\u05d3 \u05d0\u05d5\u05df \u05d2\u05e2\u05f0\u05d9\u05e1\u05df; \u05d9\u05e2\u05d3\u05e2\u05e8 \u05d6\u05d0\u05b8\u05dc \u05d6\u05d9\u05da \u05e4\u05bf\u05d9\u05e8\u05df \u05de\u05d9\u05d8 \u05d0\u05b7 \u05e6\u05f0\u05f2\u05d8\u05df \u05d0\u05d9\u05df \u05d0\u05b7 \u05d2\u05e2\u05de\u05d9\u05d8 \u05e4\u05bf\u05d5\u05df \u05d1\u05e8\u05d5\u05d3\u05e2\u05e8\u05e9\u05d0\u05b7\u05e4\u05bf\u05d8.", + "metadata": { + "languages": [ + "heb" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "33533cecec6c5714680925cbc9d55bb1", + "text": "Yoruba Gbogbo \u00e8n\u00ecy\u00e0n ni a b\u00ed n\u00ed \u00f2m\u00ecnira; iy\u00ec \u00e0ti \u1eb9\u0300t\u1ecd\u0301 k\u1ecd\u0300\u1ecd\u0300kan s\u00ec d\u1ecd\u0301gba. W\u1ecd\u0301n n\u00ed \u1eb9\u0300b\u00f9n ti l\u00e0\u00e1k\u00e0y\u00e8 \u00e0ti ti \u1eb9\u0300r\u00ed\u2010\u1ecdk\u00e0n, \u00f3 s\u00ec y\u1eb9 k\u00ed w\u1ecdn \u00f3 m\u00e1a h\u00f9w\u00e0 s\u00ed ara w\u1ecdn g\u1eb9\u0301g\u1eb9\u0301 b\u00ed \u1ecdm\u1ecd \u00ecy\u00e1.", + "metadata": { + "languages": [ + "vie" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "263ae4a61b51cca14085f92de5a8cfa5", + "text": "Yukaghir, Northern \u041a\u04e9\u0434\u044d\u04a5 \u0442\u044d\u043d - \u043d\u044c\u0438\u0434\u0438\u0442\u044d \u0431\u0430\u043d\u0434\u044c\u044d \u043f\u0430\u0440\u0430\u051d\u0430\u0430\u043d\u044c\u044d\u0440\u044d\u04a5 \u0442\u0443\u0434\u044d \u0447\u0443\u04a5\u0434\u044d\u043d \u043d\u044c\u0438\u043b\u0434\u044c\u0438\u043b\u044d\u043a \u044d\u043d\u043d\u0443\u043b\u04a5\u0438\u043d\u044c-\u043c\u044d\u0434\u044c\u0443\u043e\u043b\u043d\u0443\u043d\u0438. \u041a\u04e9\u0434\u044d\u04a5 \u044d\u043d\u043c\u0443\u043d \u0447\u0443\u043d\u0434\u044d \u043c\u044d \u043b\u044c\u044d\u0439, \u0442\u0430\u0430\u0442\u043b\u044c\u044d\u0440 \u043b\u0443\u043a\u0443\u043d\u0434\u044c\u0438\u0438 \u043d\u044c\u0438\u043d\u044d\u043c\u0434\u044c\u0438\u0439\u0438\u043b\u043f\u044d \u0434\u0438\u0442\u044d \u044d\u043d\u043d\u0443\u0439\u0443\u043e\u043b-\u043c\u043e\u0440\u0430\u051d\u043d\u044c\u044d\u04a5\u0438.", + "metadata": { + "languages": [ + "rus" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "5d93ef013b9a5b75709657ba49153ed9", + "text": "Z\u00e1paro Kawiriaja kayapuina ichaukui ta nuka pucha panicha kupanimajicha cha nuka nishima ikicha kiniana panicha tamanuka kanata ikimajicha.", + "metadata": { + "languages": [ + "swa" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "7d1772a7cde57cf4033fb6ecd38d611b", + "text": "Zapotec, G\u00fcil\u00e1 Ra'ta ra bu:unny ra:aaly liebr c\u00ebhnn te'bloh deree'ch c\u00ebhnn dignidaa. Ra:alyne:erih gahll ri:e:eny c\u00ebhnn saalyb, chiru' na:a pahr ga:annza'crih loh sa'rih.", + "metadata": { + "languages": [ + "cym", + "eng", + "ind", + "ita" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "efe41cb241efcd0774cf2f9bd328b778", + "text": "Zapotec, Miahuatl\u00e1n Diti mien ndied xa yent kuan nkie xa nak rieti xa diba xa rola.", + "metadata": { + "languages": [ + "afr", + "slk", + "cat", + "ind", + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "b1bf6eb1c62dbb55df63d0dcd8595d2a", + "text": "Zarma Fayanka kulu no si adamayzey nda care game ra i burcintara nda i alhakey cediraw kayandiya\u014b fondo ra da i na i hay. I gonda lakkal, nda laasaabu, ka\u014b ga na\u014b i ma baafunay \u0272ayzetaray haali ra.", + "metadata": { + "languages": [ + "som" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "1c60afd50d137ef937a1579b3ead3a1d", + "text": "Zhuang, Yongbei Boux boux ma daengz lajmbwn couh miz cwyouz, cinhyenz caeuq genzli bouxboux Bingzdaengj. gyoengq vunz miz lijsing caeuq liengzsim, wngdang daih gyoengq de lumj beixnuengx ityiengh.", + "metadata": { + "languages": [ + "nld", + "ind", + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ba76674b7cb1ad279f93a7027afa6ced", + "text": "Zulu Bonke abantu bazalwa bekhululekile belingana ngesithunzi nangamalungelo. Bahlanganiswe wumcabango nangunembeza futhi kufanele baphathane ngomoya wobunye.", + "metadata": { + "languages": [ + "swa", + "ind", + "tgl" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "3e64942cf704c27071b652681de4124b", + "text": "------", + "metadata": { + "languages": [ + "eng" + ], + "filetype": "text/plain", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/example-docs/language-docs/UDHR_first_article_all.txt" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.txt.json b/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.txt.json deleted file mode 100644 index 845d3f46de..0000000000 --- a/test_unstructured_ingest/expected-structured-output/local-single-file/UDHR_first_article_all.txt.json +++ /dev/null @@ -1,10298 +0,0 @@ -[ - { - "element_id": "f604d94aa69cf0073e13e3be1dced533", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Universal Declaration of Human Right - First article, all languages", - "type": "Title" - }, - { - "element_id": "f84bbc479d5bebf6b98c016e14d666d1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "© 1996 – 2009 The Office of the High Commissioner for Human Rights", - "type": "UncategorizedText" - }, - { - "element_id": "1453c80530ef11712374570a086dbd64", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "This plain text version prepared by the \"UDHR in Unicode\" project, https://www.unicode.org/udhr.", - "type": "NarrativeText" - }, - { - "element_id": "a2984ec4b6a45274d85da727619ff365", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu", - "fra" - ] - }, - "text": "------ (Bizisa) Novdiex nongv liex hufniv dav zer nier, zunxyanr niex qianrlir garhaf hufniv dav zer nier. Gixzex livxinf niex lianrxinx xief, xiongxdif guanxxif nier jinxsenr gof dav duifdaif dor.", - "type": "NarrativeText" - }, - { - "element_id": "8088ad87817694565cef5de84a691c31", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "(Jinan)", - "type": "Title" - }, - { - "element_id": "84ce1bd66b09ce990ee385a04144822e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "是人都生而自由,在尊严和权利上一律平等。他们赋有理性和良心,并应以弟兄关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "917277f019ba0320fe8bada59c1460ce", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "swa" - ] - }, - "text": "(Klau) Chix zox key zifyour, an hu tsunxyanr thungs chianrlif nu phinrten. Tsoxnur nes lishinf thungs leyx o, laiv kuanxshif to tseyr ti cinxsenr shiangxtaif.", - "type": "NarrativeText" - }, - { - "element_id": "d80b950467968341968cda0842fa7086", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu", - "eng", - "cym", - "ind" - ] - }, - "text": "(Maiunan) Renr rangf lyeuf xif zifyaot, yur zunxyant ndams chwentlif xif pingtdengl. Ter mev lilsingf ndams lyangtsinx, zingxsent gwanxsif vaif nungf.", - "type": "NarrativeText" - }, - { - "element_id": "99faf6e85f4e7e4c4588f87cbaa7598c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "fra", - "ron", - "som" - ] - }, - "text": "(Mijisa) Novzeu nongv lie kylix dav zeu xi, zunyan nie qianlif gahaf kylix dav zeu xi. Gyxzeu livxinf nie lixtolo ca xie, xiongdif guanxif ai jinsen go duifdaif do.", - "type": "NarrativeText" - }, - { - "element_id": "8767af986f37d38b27b0acec306a3a9e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "(Minjiang, spoken) Renren sen xialai de si ziyou li, zai zunlian ho quanli sang yelue pinden. Tamen fuyou lixin ho liangxin, hai yingai na xiongdi guanxi li jinsen fuxiang duidai.", - "type": "NarrativeText" - }, - { - "element_id": "4c314188c2ec950b39129175c7024070", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "(Minjiang, written) Renren sen er ziyou, zai zunlian ho quanli sang yelue pinden. Tamen fuyou lixin ho liangxin, bin yin yi xiongdi guanxi li jinsen xiang duidai.", - "type": "NarrativeText" - }, - { - "element_id": "b8ef759a5a22d6dfa9214df3680e8056", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "som", - "cat" - ] - }, - "text": "(Muzzi) Nia ngir ngir ym mexker bbumlix zifyiyr, zunyanr gix jjuanlif alyf bbumlix zzifsof wur. Garxier lixxinf gix xierfux cor yif, xiongxdif guanxif wur jinsenr la lo rim hix.", - "type": "NarrativeText" - }, - { - "element_id": "dfabb35b82a82e16d7cb50d4de138e6f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kor" - ] - }, - "text": "(Yeonbyeon) 사람들이 이 세계로 오다가 모두 자유하고, 존엄과 권리이 평동으로 있는다, 그들 리성과 양심이 있눈고, 형제의 정신으로 상호로 치료하 소.", - "type": "NarrativeText" - }, - { - "element_id": "1f41b7646ca8aebc36e8f5ec392481fb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Abkhaz Дарбанзаалак ауаҩы дшоуп ихы дақәиҭны. Ауаа зегь зинлеи патулеи еиҟароуп. Урҭ ирымоуп ахшыҩи аламыси, дара дарагь аешьеи аешьеи реиҧш еизыҟазароуп.", - "type": "NarrativeText" - }, - { - "element_id": "9d266a86305385214455dc0f297330bf", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Aceh Bandum manusia lahee bebah merdeka deungon hak ngon martabat nyang sama. Ngon akai taseumikee, ngon atee tameurasa bandum geutanyoe lagee syedara.", - "type": "NarrativeText" - }, - { - "element_id": "8754c8c0fc9122e6c416f7e32418a258", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Achuar-Shiwiar Aints ainauti mash metek nuwanmaya akiinawaitji. Turasha angkan pengker pujusmi tusar akiinawaitji. Aintstikia mash ji nintijai paan nintimratnuitji, turasha pengker aa nu nekaatnuitji. Turasha pase aa nusha nekaatnuitji. Turasha ji pataachiri ainaujai pengker nintimtunisar pujaj ina nunisrik chikich ainauj aisha pengker nintimtunisar pujustinuitji.", - "type": "NarrativeText" - }, - { - "element_id": "f741e79073265d5678150ca96cd7732c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Achuar-Shiwiar (1) Penker inintimsamka mash aintsti ankan, metekrin nuya nii penkerin takakui nii akiniamunmaya tu ausamti arantukmau atinuitji mai metekrak.", - "type": "UncategorizedText" - }, - { - "element_id": "7691e5f9dd37d6bc38044534196c1e9f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Adyghe Цӏыф пстэури шъхьэфитэу, ялъытэныгъэрэ яфэшъуашэхэмрэкӏэ зэфэдэу къалъфы. Акъылрэ зэхэшӏыкӏ гъуазэрэ яӏэшъы, зыр зым зэкъош зэхашІэ азфагу дэлъэу зэфыщытынхэ фае.", - "type": "NarrativeText" - }, - { - "element_id": "5473f0bb6f6cf5e0566fcddbbe1209c6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "som", - "ind" - ] - }, - "text": "Afar Karaamat kee garwa wagittaamal seehada inkih gide akkuk, currik taabuke. Usun kas kee cissi loonuuh, keenik mariiy mara lih toobokinni kasat gexsitam faxximta.", - "type": "NarrativeText" - }, - { - "element_id": "25b4641fe79e0196b278b85c923b6502", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "afr" - ] - }, - "text": "Afrikaans Alle menslike wesens word vry, met gelyke waardigheid en regte, gebore. Hulle het rede en gewete en behoort in die gees van broederskap teenoor mekaar op te tree.", - "type": "NarrativeText" - }, - { - "element_id": "ec78c0842e210f3ff6e147ad4fd7a4ec", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "est" - ] - }, - "text": "Aguaruna Ashi aents aidauk agkan akinui, betek eme anentsa aentsmasa diyam atanmash, tuja aents anentaibau, aents dutikatasa wakej amu yupichu dutimainnum, tuja ni wakejamun takakush tikish bakushminnum, nuniak tikish aidaujaish shiig yatsuta anmamut ati tusa.", - "type": "NarrativeText" - }, - { - "element_id": "20509f92f090bb4ecf694ea5b01d0921", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "afr", - "pol", - "swa" - ] - }, - "text": "Aja Agbetɔwo pleŋu vanɔ gbɛmɛ ko vovoɖeka gbeswɛgbeswɛ, sɔto amɛnyinyi ko acɛwo gomɛ; wo xɔnɔ susunywin ko jimɛnywi so esexwe. Wo ɖo a wɛ nɔvi ɖaɖa wowo nɔnɔwo gbɔ.", - "type": "NarrativeText" - }, - { - "element_id": "f6e32446c48b0755dfcf243a8142d613", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi" - ] - }, - "text": "Albanian, Tosk Të gjithë njerëzit lindin të lirë dhe të barabartë në dinjitet dhe në të drejta. Ata kanë arsye dhe ndërgjegje dhe duhet të sillen ndaj njëri tjetrit me frymë vëllazërimi.", - "type": "NarrativeText" - }, - { - "element_id": "9a69378bfb3e4825a781de59826eff73", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu" - ] - }, - "text": "Alemannisch (Elsassisch) Àlli Mensche kùmme mìt de gliche Wìrde ùn Rachte ùff d’Walt. Sie hàn àlli Vernùnft ùn Gewìsse ùn selle mìt Brìederlichkeit de àndere gejjenìwwer hàndle.", - "type": "NarrativeText" - }, - { - "element_id": "d5de29db1ca19f8ac33afb7049462513", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus", - "mkd" - ] - }, - "text": "Altai, Southern Ончо улус ак‐јарыкка јайым ла теҥ‐тай тап‐эриктӱ туулат. Олор санааукаалу ла чек кӱӱн‐тапту болуп бӱткен ле бой‐бойын карындаш кирези кӧрӧр лӧ јӱрер учурлу.", - "type": "NarrativeText" - }, - { - "element_id": "f878523840c938997e3bbd0cd1f38c38", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "spa", - "hrv" - ] - }, - "text": "Amahuaca Tzovan jato jumahaito hinaayamanonmun vacunoxcanquihnucanpu. Tzovan jato zinaayamanonmun vacunoxcanquihnucanpu. Jonitzan derechocavizyahtoxrivimun vacunoxcanquihqui. Quiyoovinin shinanquin hiromaquin jaucuzahavorahquiqui shinantimunhcanquihqui. Vacurazixquicavizhi quiyoovinixjatimunhcanquihnucanpu.", - "type": "NarrativeText" - }, - { - "element_id": "d0963c28613cf0e49ccc8378af7f29b7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Amarakaeri Aya'da aratbut katepi' eka'ta' on'pakpo ka'dik o̱'ne. Nog aratbut huadak o̱'nepo ko̱nigti opudo̱mey huadak mo'e̱. Aya'da huadak eka' nopoe̱'dik o̱'ne kenpa'ti dakhuea' eka' nopoe̱'dik o̱'ne kenpa'ti ko̱nig huama'buytaj o 'tihuapokika' ko̱nigti nogo̱meytaj tihuapokika 'dik o̱'ne.", - "type": "NarrativeText" - }, - { - "element_id": "a0647ca94e22e1ec055ae4ae29467e8c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Amharic", - "type": "Title" - }, - { - "element_id": "8c8d0d9098a83b293045f03fbe07358d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "የሰው፡ልጅ፡ሁሉ፡ሲወለድ፡ነጻና፡በክብርና፡በመብትም፡እኩልነት፡ያለው፡ነው።፡የተፈጥሮ፡ማስተዋልና፡ሕሊና፡ስላለው፡አንዱ፡ሌላውን፡በወንድማማችነት፡መንፈስ፡መመልከት፡ይገባዋል።", - "type": "Title" - }, - { - "element_id": "b73c2ef2ae45307ba7290eb553e346c0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "swa" - ] - }, - "text": "Amis Chiyu mahufuchay tu tamlaw, maemin pingdeng ichunyan a kngli. Iraay chaira lishing a naay a naay a harateng, pimaulahsha u harateng nu kaka shafa.", - "type": "NarrativeText" - }, - { - "element_id": "ca25bafc08b1e2c757647d046263bceb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "som" - ] - }, - "text": "Arabela Pueyano pa quishacari, puetunu pajaniyajanaa mariyata miishiya maninia, maja sooshiya tamonu. Puetunu pueyajanaari niishitiajaraca, jiuujiaaracanio pueyacua pa taraajenura. Naarate maninia pa jiyanootioore juhua pa tapueyocuaca.", - "type": "NarrativeText" - }, - { - "element_id": "e1a81a0e10a38df3526fc4432de66ad3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ara" - ] - }, - "text": "Arabic, Standard يولد جميع الناس أحرارًا متساوين في الكرامة والحقوق. وقد وهبوا عقلاً وضميرًا وعليهم أن يعامل بعضهم بعضًا بروح الإخاء.", - "type": "NarrativeText" - }, - { - "element_id": "72d099b2761f12d204f35cc85600f8dd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est" - ] - }, - "text": "Armenian Բոլոր մարդիկ ծնվում են ազատ ու հավասար իրենց արժանապատվությամբ ու իրավունքներով։ Նրանք ունեն բանականություն ու խիղճ և միմյանց պետք է եղբայրաբար վերաբերվեն։", - "type": "UncategorizedText" - }, - { - "element_id": "38291b67d0eaef665797206e43651164", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron", - "sqi" - ] - }, - "text": "Aromanian Tuti iatsâli umineshtsâ s-fac liberi shi egali la nâmuzea shi-ndrepturli. Eali suntu hârziti cu fichiri shi sinidisi shi lipseashti un cu alantu sh-si poartâ tu duhlu-a frâtsâljiljei.", - "type": "NarrativeText" - }, - { - "element_id": "6bb51b6b82df3d4800c98e8415754489", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin", - "est", - "cat" - ] - }, - "text": "Asháninka Aquempetavacaajeita maaroni atiri. Timatsi aquenqueshirejeitantari maaroni, timatsi amejeitari, ayojeiti paitarica ocameetsati antajeitiri: te oncameetsateji intsaneapitsajeiteero itsipapee. Te oncameetsateji imperanajeitee, te oncameetsateji iroashinoncaajeitee, irointi ocameetsati aacameetsatavacaajeitea.", - "type": "NarrativeText" - }, - { - "element_id": "ef818e559e5b9629b3da213d71f6d693", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "cat", - "fin" - ] - }, - "text": "Ashéninka, Pichis Maaroni atziripayeeni, ovaquera intzimapaaque, eero ocantzi iñaashitacaavaitaityaari iromperanataityaari. Eejatzi oquemitari iroñaaca te apantyaaro amanitashireteri atziri ancanteri: \"Te pirjiperote eeroca, iriima irinta iriitaque ñaaperori\". Eejatzi oquemitari te oncameethate intzime aparoni atziri antayetashityaarone caari ishinetaacairi pashine irantero. Tema maaroni ayotziro ampampithashirvaayeta, ayotziro tsicarica otzimayetzi cameethatatsiri anteri o tsicarica otzimi caariperotatsiri, irootaque ocovaperotantari iroñaaca entacotavacaayetya anquemitacaantanaquero arentzitavacaatyeeyaami ocaaquiini.", - "type": "NarrativeText" - }, - { - "element_id": "5cb0bb4fdc15b35295973bd4a2247bd1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Assyrian Neo-Aramaic ܟܠ ܒܪܢܫܐ ܒܪܝܠܗ ܚܐܪܐ ܘܒܪܒܪ ܓܘ ܐܝܩܪܐ ܘܙܕܩܐ. ܘܦܝܫܝܠܗ ܝܗܒܐ ܗܘܢܐ ܘܐܢܝܬ. ܒܘܕ ܕܐܗܐ ܓܫܩܬܝ ܥܠ ܐܚܪܢܐ ܓܪܓ ܗܘܝܐ ܒܚܕ ܪܘܚܐ ܕܐܚܢܘܬܐ.", - "type": "NarrativeText" - }, - { - "element_id": "fc37a0c903b4ad45223fa0a367de3b9b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Asturian Tolos seres humanos nacen llibres y iguales en dignidá y drechos y, pola mor de la razón y la conciencia de so, han comportase hermaniblemente los unos colos otros.", - "type": "NarrativeText" - }, - { - "element_id": "beb1f5ccad6046d907217e2c355a3d20", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Awa", - "type": "Title" - }, - { - "element_id": "545202363638daaa690a4f5b184ad0da", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Cuaiquer", - "type": "Title" - }, - { - "element_id": "4e13c433d775a93f0bb6c40cbb2d5a03", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "tgl" - ] - }, - "text": "Aymara, Central Taqpach jaqejh khuskat uñjatatäpjhewa munañapansa, lurañapansa, amuyasiñapansa, ukatwa jilani sullkanípjhaspas ukham uñjasipjhañapawa.", - "type": "UncategorizedText" - }, - { - "element_id": "8afc3caab3e458628b6f2efdb46fc6d1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus", - "mkd" - ] - }, - "text": "Azerbaijani, North (Cyrillic) Бүтүн инсанлар ләјагәт вә һүгугларына ҝөрә азад вә бәрабәр доғулурлар. Онларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.", - "type": "NarrativeText" - }, - { - "element_id": "6d9f8766b1812e209f1a59654443299c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Azerbaijani, North (Latin) Bütün insanlar ləyaqət və hüquqlarına görə azad və bərabər doğulurlar. Onların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar.", - "type": "NarrativeText" - }, - { - "element_id": "3681d23b771b9cf26263ab194af3430d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ind" - ] - }, - "text": "Baatonum Ba tɔmbu kpuro marawa ba tii mɔ, ba nɛ, girima ka saria sɔɔ. Ba ra bwisiku, ba dasabu mɔ, ma n weene ba n waasinɛ mɛrobisiru sɔɔ.", - "type": "NarrativeText" - }, - { - "element_id": "fdd8c85dd832c6fb407357bf176d958a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Bali Sami manusane sane nyruwadi wantah merdeka tur maduwe kautamaan lan hak-hak sane pateh. Sami kalugrain papineh lan idep tur mangdane pada masawitra melarapan semangat pakulawargaan.", - "type": "NarrativeText" - }, - { - "element_id": "394114d333ed34e0add89b5e9079d474", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ind", - "swa" - ] - }, - "text": "Bamanankan Hadamaden bɛɛ danmakɛɲɛnen bɛ bange, danbe ni josira la. Hakili ni taasi b’u bɛɛ la, wa u ka kan ka badenɲasira de waleya u ni ɲɔgɔn cɛ.", - "type": "NarrativeText" - }, - { - "element_id": "31e2922fd7a67918fa2a09744965a970", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi" - ] - }, - "text": "Bamun Pe nâ mvé gú puen nyütu pô te mbe kú ghét ngam pua ngúenengúe mbe te wûme nsebe pua pa mféékêt. Pen â ntúm te mbe kú rem ngam pua fabshe ngam, a nshi njîr’ap ne yi nshâne ngétne nga shap pô te wupme pontâ.", - "type": "NarrativeText" - }, - { - "element_id": "c5815bd56d9b0f7114cfa825514698ca", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Baoulé Sran mun be ngba, kɛ be wu be ɔ, be ngba be sɛ, fɔndi nun, sran-mmala nun. Be si akundanbu, be si su ɔ fata kɛ sran mun be tran'n, be tran aniaan nun tranlɛ.", - "type": "NarrativeText" - }, - { - "element_id": "f937bd218ac832a520fee7be14b4e89c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "fin" - ] - }, - "text": "Bari Ŋutu liŋ a yuŋwe kana, jojo i toďiri ko ďekesi ko ti se tokitaki ko ‘börik ko mulökötyo lo toluŋaseran. Se a ďoka ko denet na kulya na’but ko narok.", - "type": "NarrativeText" - }, - { - "element_id": "72f3bf8426b189087874ec1b7bd46496", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "spa", - "nld" - ] - }, - "text": "Basque Gizon-emakume guztiak aske jaiotzen dira, duintasun eta eskubide berberak dituztela; eta ezaguera eta kontzientzia dutenez gero, elkarren artean senide legez jokatu beharra dute.", - "type": "NarrativeText" - }, - { - "element_id": "5ce714cfa1def0c0d951bf7bff485500", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ukr", - "rus" - ] - }, - "text": "Belarusan Усе людзі нараджаюцца свабоднымі і роўнымі ў сваёй годнасці і правах. Яны надзелены розумам і сумленнем і павінны ставіцца адзін да аднаго ў духу брацтва.", - "type": "NarrativeText" - }, - { - "element_id": "f1c956f483b1e3df941f43c5bba31674", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Bemba Abantu bonse bafyalwa abalubuka nokulingana mu mucinshi nensambu. Balikwata amano nokutontonkanya, eico bafwile ukulacita ifintu ku banabo mu mutima wa bwananyina.", - "type": "NarrativeText" - }, - { - "element_id": "bb5acaee87121a890d36cb7afd3ad15a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ben" - ] - }, - "text": "Bengali সমস্ত মানুষ স্বাধীনভাবে সমান মর্যাদা এবং অধিকার নিয়ে জন্মগ্রহণ করে। তাঁদের বিবেক এবং বুদ্ধি আছে; সুতরাং সকলেরই একে অপরের প্রতি ভ্রাতৃত্বসুলভ মনোভাব নিয়ে আচরণ করা উচিত।", - "type": "UncategorizedText" - }, - { - "element_id": "d5919948b12c6b7e2c5179487170dd51", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hin" - ] - }, - "text": "Bhojpuri सबहि लोकानि आजादे जम्मेला आओर ओखिनियो के बराबर सम्मान आओर अधिकार प्राप्त हवे। ओखिनियो के पास समझ-बूझ आओर अंत:करण के आवाज होखता आओर हुनको के दोसरा के साथ भाईचारा के बेवहार करे के होखला।", - "type": "UncategorizedText" - }, - { - "element_id": "4b7a06328d8e5cf300c21e03834148ad", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Bicolano, Central An gabos na tawo ipinangaking may katalinkasan asin parantay sa dignidad asin derechos. Sinda gabos tinawan nin pag-isip asin conciencia kaya dapat na makipag-iriba sa lambang saro bilang mga magturugang.", - "type": "NarrativeText" - }, - { - "element_id": "e82a426f8b4696143791e8f79987471d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "eng", - "ita" - ] - }, - "text": "Bislama Evri man mo woman i bon fri mo ikwol long respek mo ol raet. Oli gat risen mo tingting mo oli mas tritim wanwan long olgeta olsem ol brata mo sista.", - "type": "NarrativeText" - }, - { - "element_id": "09176e19ded6b0ff879ead0799cc2302", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hun" - ] - }, - "text": "Bora Pámeere ííñújɨri meíjcyame tsá múhójɨ́sɨ́ pañé ɨ́cubáhrádú meíjcyáítyuróne. Pámeere tsahdúré imí meíjcyame mewájyújcatsíñe mépɨ́áábójcatsíiyá tsaatéké éhdɨ́Ȉ́válletúmé éhne múu mépañétúéné nahbémuma meíjcyadu.", - "type": "NarrativeText" - }, - { - "element_id": "5e3ff47fa6202cd3f10a179ea2b898e3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mkd" - ] - }, - "text": "Bosnian (Cyrillic) Сва људска бића раћају се слободна и једнака у достојанству и правима. Она су обдарена разумом и свијешћу и треба да једно према другоме поступају у духу братства.", - "type": "NarrativeText" - }, - { - "element_id": "8918cf337af35db75c0b7e3a98572814", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Bosnian (Latin) Sva ljudska bića rađaju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i sviješću i treba da jedno prema drugome postupaju u duhu bratstva.", - "type": "NarrativeText" - }, - { - "element_id": "4f74a58266d23d68a787e2a91434a97d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "cym" - ] - }, - "text": "Breton Dieub ha par en o dellezegezh hag o gwirioù eo ganet an holl dud. Poell ha skiant zo dezho ha dleout a reont bevañ an eil gant egile en ur spered a genvreudeuriezh.", - "type": "NarrativeText" - }, - { - "element_id": "289a54f8ff934baba89545ec92d5b768", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl" - ] - }, - "text": "Bugis Sininna rupa tau ri jajiangngi rilinoe nappunnai manengngi riasengnge alebbireng . Nappunai riasengnge akkaleng, nappunai riasengnge ati marennni na sibole bolena pada sipakatau pada massalasureng.", - "type": "NarrativeText" - }, - { - "element_id": "24a3cf3bd02d17e2f2b065bab51c8e70", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "bul" - ] - }, - "text": "Bulgarian Всички хора се раждат свободни и равни по достойнство и права. Те са надарени с разум и съвест и следва да се отнасят помежду си в дух на братство.", - "type": "NarrativeText" - }, - { - "element_id": "61589cb2ca0346e6af7f49a73b4125b3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron", - "cat", - "ita", - "hun" - ] - }, - "text": "Bulu Abiali bod bese, tege ai sesala, bene etie dzia a mis memvende y'enyiñ, dzom dzia etu fili nkóbó, fili ntsogan, fili mboan. Ve abiali te, mod ose ayem dze ene abe, dze ene mbeñ asu e mod mbog antoa ai mfi na enyiñ ewulu mezen mene sosoo.", - "type": "NarrativeText" - }, - { - "element_id": "6dbacafdbc68b6ba0689b2d27b2ede49", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Burmese လူတိုင်းသည် တူညီ လွတ်လပ်သော ဂုဏ်သိက္ခာဖြင့် လည်းကောင်း၊ တူညီလွတ်လပ်သော အခွင့်အရေးများဖြင့် လည်းကောင်း၊ မွေးဖွားလာသူများ ဖြစ်သည်။ ထိုသူတို့၌ ပိုင်းခြား ဝေဖန်တတ်သော ဉာဏ်နှင့် ကျင့်ဝတ် သိတတ်သော စိတ်တို့ရှိကြ၍ ထိုသူတို့သည် အချင်းချင်း မေတ္တာထား၍ ဆက်ဆံကျင့်သုံးသင့်၏။", - "type": "UncategorizedText" - }, - { - "element_id": "7b5c1459fc45a2821c0d05cd98c1996f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Bushi Ɓinadamu djabi nitirahinyi an-nafasi, reu bokeu miraŋa amin’ni usheu ndreka haki. Reu teraka ndreka ãkili ndreka hikima, amin’ni zenyi, reu nikulazimu nisi tweraŋa nin-fihavaŋa reu sambi reu.", - "type": "NarrativeText" - }, - { - "element_id": "799ae8db337401700ed035d921a073fc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Candoshi-Shapra Iy tpotsini ichigoroni kis tamam zadkini, vatam tpotsiniva. Vatam ichigoroni magini tarova; ashiriya chinakaniya. Ashirocha, zovalliatsich tamaparia-ashiros sanpata chinagtsa atiniya.", - "type": "NarrativeText" - }, - { - "element_id": "b2fc5040743a2c1361f561dfc9c2e365", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "som" - ] - }, - "text": "Caquinte Aquejetavacaajiaca maasano caquinte. Chooca aquenquejantaca maasano, chooca amejigaca, atsajiaque taaca opajitapae ocameetsataque antajiguica. Tee oncameetsateji iromperaperanajicaji, tee oncameetsateji irogashinoncajajiacaji. Jero cameetsatatsica aavacaj aiaquempa.", - "type": "NarrativeText" - }, - { - "element_id": "296f3e08ce32c544b7ce3922abf32c6c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "cat", - "som" - ] - }, - "text": "Cashibo-Cacataibo Ui uni cara 'iti icë axbi ca bëtsi unibë gobiernonën iscëx sënën ití icën. Ui cara ain tita ain papa 'iaxa quixun sinanquinma ca gobiernonën sinancëx ax bëtsibë sënën 'icën. Camaxunbi ca sinanti 'unanin. Camaxunbi ca añu ñu ati cara asábi 'icën, añu ñu 'ati cara 'aisama 'icë quixun 'unanti 'icën. Usa 'ain ca camaxbi ain xucënbë 'icësaribiti nuiananti 'icën.", - "type": "NarrativeText" - }, - { - "element_id": "db88e492d6becfc01c06289965d5b5b4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Cashinahua Yudabu dasibi jabiaskadi akin, xinantidubuki. Javen taea jau jaibunamenunbunven.", - "type": "NarrativeText" - }, - { - "element_id": "75c025da4f4c95d2f428dc459b739bef", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat" - ] - }, - "text": "Catalan-Valencian-Balear Tots els éssers humans neixen lliures i iguals en dignitat i en drets. Són dotats de raó i de consciència, i han de comportar-se fraternalment els uns amb els altres.", - "type": "NarrativeText" - }, - { - "element_id": "6bc8b32628a3759388373fd8872ce83d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Cebuano Ang tanang katawhan gipakatawo nga may kagawasan ug managsama sa kabililhon. Sila gigasahan sa salabutan ug tanlag og mag-ilhanay isip managsoon sa usa'g-usa diha sa diwa sa ospiritu.", - "type": "NarrativeText" - }, - { - "element_id": "346a128271cb055071a9b9d4548d0488", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Chachi Naaju chachilla bain mu' chachilla bain na kayatu tichiba bulla jutyu naakendya'ba kenu deechu taa na kayamu deju, tsenminya,naaju juñu bain ne tsaave ti', uukavinu jutyu naa, tideechu juuchi bain, mubain mubain tsaren dejuve, tsenmin shilli pensangenu pude deju'. mitya, tsenr)1in ura' kendu bain ura' kendyu' bain mide' mitya muba mu bain veta' veta' ura' keewaawaa kenuu dejuve.", - "type": "NarrativeText" - }, - { - "element_id": "0b1ae7cf56e3557ef9acecc99806172b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Chakma 𑄝𑄬𑄇𑄴 𑄟𑄚𑄪𑄌𑄴 𑄚𑄨𑄢𑄨𑄞𑄨𑄣𑄨 𑄥𑄧𑄁 𑄃𑄨𑄌𑄴𑄎𑄮𑄖𑄴 𑄃𑄳𑄃 𑄃𑄇𑄴𑄇𑄥𑄁 𑄚𑄨𑄚𑄬𑄭 𑄎𑄧𑄚𑄴𑄟𑄚𑄴𑅁 𑄖𑄢𑄢𑄴 𑄃𑄬𑄘 𑄃𑄳𑄃 𑄝𑄪𑄖𑄴𑄙𑄨 𑄃𑄊𑄬; 𑄥𑄬𑄚𑄧𑄖𑄳𑄠𑄴 𑄝𑄬𑄇𑄴𑄅𑄚𑄧𑄢𑄴 𑄃𑄬𑄇𑄴𑄎𑄧𑄚𑄴 𑄃𑄢𑄬𑄇𑄴 𑄎𑄧𑄚𑄧𑄢𑄴 𑄛𑄳𑄢𑄧𑄖𑄨 𑄉𑄧𑄟𑄴 𑄘𑄮𑄣𑄴 𑄌𑄨𑄘𑄳𑄠𑄬 𑄚𑄨𑄚𑄬𑄭 𑄌𑄧𑄣𑄚 𑄅𑄪𑄌𑄨𑄖𑄴𑅁", - "type": "UncategorizedText" - }, - { - "element_id": "72d0a5cc00f21c3695d03aa4624b3452", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa", - "ind" - ] - }, - "text": "Chamorro Todo taotao siha man mafanago libertao yan pareho gi dignidad yan derecho siha, man manae siha hinaso yan consiencia yan debe de ufatinas contra uno yan otro gi un espiritun chumelo.", - "type": "NarrativeText" - }, - { - "element_id": "87e7fb3e75a3a124c8e4bce8573a5dd1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "swa", - "sqi" - ] - }, - "text": "Chayahuita Ya'ipi piyapinpoa' capini noya ninosorocaso' ya'huërin. Ya'ipinpoa' yonquirëhua'. Noya nicacaso' nitotërëhua'. Napoaton iyanpoa pochin ninosorocaso' ya 'huërin.", - "type": "NarrativeText" - }, - { - "element_id": "03ea2a4dd341c6cdd4c3ddd814721290", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Cherokee (cased) Ꮒꭶꮣ ꭰꮒᏼꮻ ꭴꮎꮥꮕꭲ ꭴꮎꮪꮣꮄꮣ ꭰꮄ ꭱꮷꮃꭽꮙ ꮎꭲ ꭰꮲꮙꮩꮧ ꭰꮄ ꭴꮒꮂ ꭲᏻꮎꮫꮧꭲ. Ꮎꮝꭹꮎꮓ ꭴꮅꮝꭺꮈꮤꮕꭹ ꭴꮰꮿꮝꮧ ꮕᏸꮅꮫꭹ ꭰꮄ ꭰꮣꮕꮦꮯꮣꮝꮧ ꭰꮄ ꭱꮅꮝꮧ ꮟᏼꮻꭽ ꮒꮪꮎꮣꮫꮎꮥꭼꭹ ꮎ ꮧꮎꮣꮕꮯ ꭰꮣꮕꮩ ꭼꮧ.", - "type": "NarrativeText" - }, - { - "element_id": "09009508dba31db1f130bf24d409614e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Cherokee (uppercase) ᏂᎦᏓ ᎠᏂᏴᏫ ᎤᎾᏕᏅᎢ ᎤᎾᏚᏓᎴᏓ ᎠᎴ ᎡᏧᎳᎭᏉ ᎾᎢ ᎠᏢᏉᏙᏗ ᎠᎴ ᎤᏂᎲ ᎢᏳᎾᏛᏗᎢ. ᎾᏍᎩᎾᏃ ᎤᎵᏍᎪᎸᏔᏅᎩ ᎤᏠᏯᏍᏗ ᏅᏰᎵᏛᎩ ᎠᎴ ᎠᏓᏅᏖᏟᏓᏍᏗ ᎠᎴ ᎡᎵᏍᏗ ᏏᏴᏫᎭ ᏂᏚᎾᏓᏛᎾᏕᎬᎩ Ꮎ ᏗᎾᏓᏅᏟ ᎠᏓᏅᏙ ᎬᏗ.", - "type": "NarrativeText" - }, - { - "element_id": "ca845e694f20fb1947def444cd1f59f9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "fin" - ] - }, - "text": "Chickasaw Himmaka' nittakookano hattak yokasht toksalicha'nikat ki'yo. Hattak mó̱makat ittíllawwi bíyyi'kacha nanna mó̱maka̱ ittibaachaffa'hitok.", - "type": "NarrativeText" - }, - { - "element_id": "2106a38ef8d9320b2c785f5e0827b8ff", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Chin, Falam Mikip in bangrep ii zalen nak le sunlawih nak thawn, bangrep in covo nei in, asuak mi kan si. Anmah in hleidan thei nak fim nak le nuncan neih thei nak ruah nak nei ii, pakhat le pakhat duh dawt nak, pawl awk nak nei ding asi.", - "type": "NarrativeText" - }, - { - "element_id": "83538732daca0377dcf87942c7e10b41", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl" - ] - }, - "text": "Chin, Haka Mi vialte hna cu zalong te, ai tluk te le upat tihzah awktlak le thiltikhawhnak tinvo a ngei in a chuak mi kan si dih. Minung cu a chia a tha thleidang khomi ruahnak le theihthiamnak ziaza tha a ngeimi kan si caah pakhat le pakhat dawtnak he i pehtlaihnak le i hawikawmhnak ngeih ding kan si.", - "type": "NarrativeText" - }, - { - "element_id": "2dc80f80340d36e85a551642585e592a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Chin, Matu Thlangboeih he rhimomna, vanpitna, yalpona hamhmoel ka tawn thlang la cuun la ng’om u. Thlanghing he athae-then paekboe thaina neh yakming thaina moeiboe ka tawn thlang la n’om u dong ah khat neh khat lungvat na neh thloehlan voekhlak u thae ham om.", - "type": "NarrativeText" - }, - { - "element_id": "78bcf1425ec172c6ded201b1814121a0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Chin, Tedim Mihingte khempeuh in thupitak leh thuneihna tawh suakta tak leh akibang in kipiang ciat ahi hi. Asia leh apha khentel thei thungaihsutna nei ciat uh ahihman in khat leh khat sanggam unau lungsim tawh kithuah khop ding hi.", - "type": "NarrativeText" - }, - { - "element_id": "66e7bb8d8db209646cecea79ecf23f89", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Chinantec, Chiltepec Lejɨ̈ ni sou tsa lisia̱ ija̱a sia ikou' ne kojo̱ jï ne juso̱ ne jmo' re ju i sɨ' jmo' nö sala̱ ne sasno.", - "type": "NarrativeText" - }, - { - "element_id": "b29e38dc8292efa10880271bbb145f07", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin", - "cat" - ] - }, - "text": "Chinantec, Ojitlán La juu dsa lu siä –Dsa kö ñi ba dsa, nía kö ni' ba na lu' dsa e dsa tï é li jnia' roö'.", - "type": "NarrativeText" - }, - { - "element_id": "621ff48c1c000fb4631b736f4a775ada", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Gan", - "type": "Title" - }, - { - "element_id": "be604439089a8fedd5abdc4d81187599", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "人人生而自由,在志向跟权利上一律平等。渠们赋有理性跟良心,并理当以弟兄义气相对待。", - "type": "Title" - }, - { - "element_id": "2c7b5f9c25a7c50b8ff18bf830c23d94", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Hakka", - "type": "Title" - }, - { - "element_id": "05e53430ff030465078e511efc0de0b2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,在尊严同权利上一律平等。佢丁人赋有理性同好心田,并应以兄弟关系个精神相对待。", - "type": "Title" - }, - { - "element_id": "ebfd1fe8c2a3f89dc902d868e38f4ee4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Jinyu", - "type": "Title" - }, - { - "element_id": "549cb1628fe3e0cafb78cd92f08f0554", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,在尊严和权利上一律平等。他们赋有理性和良心,并应以弟兄关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "c49bd62da37860cce7a77fe260bebf2b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Beijing)", - "type": "Title" - }, - { - "element_id": "bf0df306ed131c2adf4243ded3865e6a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,挨尊严和权利上一刬平等。他们趁理性和良心,并应以一个座儿的精神相对待。", - "type": "Title" - }, - { - "element_id": "7c55aa8729281fbd5c6fc4c3ab62d9a1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Guiyang)", - "type": "Title" - }, - { - "element_id": "ba1e57780fc9d286c63be7e8e73e3c2e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "人人生而自由,在尊严和权利上一律是平等的。他们赋有理性和良心,并应以兄弟关系的精神相互对待。", - "type": "Title" - }, - { - "element_id": "84fcd3db2129561d730906d791a0b309", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Harbin)", - "type": "Title" - }, - { - "element_id": "bdf44eafec897495cf404ac895e41ee3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "人人生而自由,在尊严和权利之上一律平等。他们赋有理性和良心,并应以哥们弟兄的精神相对待。", - "type": "Title" - }, - { - "element_id": "d03595a53957527885710152809f751a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Nanjing)", - "type": "Title" - }, - { - "element_id": "a96206ba057e6ac6c0fdb4c87d21a1c9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "大家生而自由,在尊严告权利上头一律平等。他们赋有理性告良心,并该派以兄弟关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "f25715562a899772b8aed3cfb3962ec1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Simplified)", - "type": "Title" - }, - { - "element_id": "c185fc727614ade15888d1e8c9a00c4d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,在尊严和权利上一律平等。他们赋有理性和良心,并应以兄弟关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "2d117756d05c842183baab2b6b9ec4be", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Tianjin)", - "type": "Title" - }, - { - "element_id": "9e8a7703ae5139a2870b236cfa54cfd6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "人个顶个生而自由,在尊严和权利上般儿般儿大。他们趁理性和良心,并应以兄弟关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "88b1f3c15657c3079cb323e733d61a60", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Mandarin (Traditional)", - "type": "Title" - }, - { - "element_id": "0e1d6539c2001d2ba8e3188f43b83f7f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kor", - "zho" - ] - }, - "text": "人人生而自由,在尊嚴和權利上一律平等。他們賦有理性和良心,並應以兄弟關係的精神相對待。", - "type": "Title" - }, - { - "element_id": "d4e689db8d161e7f3c8d633b36d869ad", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Min Nan", - "type": "Title" - }, - { - "element_id": "48659e28c3b04b69caeaa16aded28f58", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,在尊严合权利上一律平等。因赋有脾胃合道行,并着以兄弟关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "7f3fbf32ccbb91a3f12ae80cec59c7cb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Wu", - "type": "Title" - }, - { - "element_id": "c8272c39e78f413c6902b423da92287d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho", - "kor" - ] - }, - "text": "人人生而自由,拉尊严脱仔权利上一律平等。伊拉有理性脱仔良心,并应以兄弟关系个精神相对待。", - "type": "Title" - }, - { - "element_id": "4df4815855b4baec40b68107613e8d88", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Xiang", - "type": "Title" - }, - { - "element_id": "7d70d884e74db8b4302ba0589166c634", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "人人生而自由,在尊严和权利上一律平等。他们赋有理性和良心,在得以兄弟关系的精神相对待。", - "type": "Title" - }, - { - "element_id": "9a0ba50e87478d37004278ca8d36dd31", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Chinese, Yue", - "type": "Title" - }, - { - "element_id": "932a20508f1be7b3c6fa54b0f9e46f14", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kor", - "zho" - ] - }, - "text": "人人生而平等,喺尊严同埋权利上一律平等。佢哋有理性同埋良心,而且应当以兄弟关系嘅精神相对待。", - "type": "Title" - }, - { - "element_id": "ce974d0bb96c5c69b851dee0186a878b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Chokwe Mwese yoze masemuka katela ukulungunga ulengunga ulemu nyi vumbi eswe ci mwikha. Eswe kalingile kupwa nyi usambe nyi mangana nyi kuhasa kulimika nyumwe nyi mukwo nyi kulita nyi mbunge ya ulemu wa utu.", - "type": "NarrativeText" - }, - { - "element_id": "f535b754beb32eadd599ba0868093417", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Chokwe (Angola) Athu eswe kakusemuka ngwe akwo, ku vumbi nyi hakusakula.Kali nyi mana,mba mahasa kulinga umwu hali mukwo nyi espiritu ya kuli kuasa.", - "type": "NarrativeText" - }, - { - "element_id": "7ddc431e7188689370bfef9102ab3594", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swe", - "eng" - ] - }, - "text": "Chuukese Esap wor och mettoch epwe appeti aramas seni fansoun ar uputiu non ar tufich me rait. Ar ekiek epwe mecheres o esap pet ren och sakkun mettoch pun ir repwe nonnomfengen non kinamwe o pwipwi annim.", - "type": "NarrativeText" - }, - { - "element_id": "93683f443b25a57d05bfb3b2ab1533a8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus", - "bul" - ] - }, - "text": "Chuvash Пур халӑх та уйрӑм пурӑнма пӗр тан праваллӑ. Ҫак правапа усӑ курса вӗсем хӑйсен политика статусне ирӗклӗн туса хураҫҫӗ, экономика, общество тата культура енӗпе ирӗклӗн аталанаҫҫӗ. Патшалӑхсен ҫак правӑна хисеплемелле, территори пӗр пӗтӗмлӗхӗн принципӗ унпа усӑ курма пӗр енлӗн чарса тӑракан чӑрмав пулмалла мар.", - "type": "NarrativeText" - }, - { - "element_id": "ec45c354f720412810f1027f2aba4a1b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin", - "swa" - ] - }, - "text": "Colorado Piyanle tsa'chila, mankarijun, junshi manta tan, in tobi jaminlajoyoe, titi mi, tenka kano min, junshi, tsa'chila tala, sen jono min.", - "type": "UncategorizedText" - }, - { - "element_id": "7829c582fafb0be79ca15885a9ffe253", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Comorian, Maore Wanadamu piya udzalwa huru tsena sawa ha ufahari na ha haki. Na wawo wana ãkili na hisi, esa ilazimu wadzivhinge na wanyao ha fikira ya unanya.", - "type": "NarrativeText" - }, - { - "element_id": "829255cbbb3bc358933cca778550205a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Comorian, Ngazidja Wo wanadamu piya wo uzalwa na uhuriya na usawa waki undru na uhaki. Wo upwawa ankili na urambuzi hayizo yilazimu warwaliyane hazitrendwa na fikira zaki unanya.", - "type": "NarrativeText" - }, - { - "element_id": "8aea2ff9710269cb8bdfd811de62b8cd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Corsican Nascinu tutti l’omi libari è pari di dignità è di diritti. Pussedinu a raghjoni è a cuscenza è li tocca ad agiscia trà elli di modu fraternu.", - "type": "NarrativeText" - }, - { - "element_id": "7174e554bd11372c5e339ba08b9881ab", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Cree, Swampy ᒥᓯᐌ ᐃᓂᓂᐤ ᑎᐯᓂᒥᑎᓱᐎᓂᐠ ᐁᔑ ᓂᑕᐎᑭᐟ ᓀᐢᑕ ᐯᔭᑾᐣ ᑭᒋ ᐃᔑ ᑲᓇᐗᐸᒥᑯᐎᓯᐟ ᑭᐢᑌᓂᒥᑎᓱᐎᓂᐠ ᓀᐢᑕ ᒥᓂᑯᐎᓯᐎᓇ᙮ ᐁ ᐸᑭᑎᓇᒪᒋᐠ ᑲᑫᑕᐌᓂᑕᒧᐎᓂᓂᐤ ᓀᐢᑕ ᒥᑐᓀᓂᒋᑲᓂᓂᐤ ᓀᐢᑕ ᐎᒋᑴᓯᑐᐎᓂᐠ ᑭᒋ ᐃᔑ ᑲᓇᐗᐸᒥᑐᒋᐠ᙮", - "type": "UncategorizedText" - }, - { - "element_id": "952f38639569c0ef489cc6ebb4e809a7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Crimean Tatar Bütün insanlar serbestlik, menlik ve uquqlarda musaviy olıp dünyağa keleler. Olar aqıl ve vicdan saibidirler ve biri-birilerinen qardaşçasına munasebette bulunmalıdırlar", - "type": "NarrativeText" - }, - { - "element_id": "2ed33ba01de24e402f5963e9b2b56328", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "por" - ] - }, - "text": "Crioulo, Upper Guinea Tudu pekaduris ta padidu libri i igual na balur suma na diritus. Suma e dadu kapasidadi di pensa, e tene tambi konsiensia, e dibi di trata ñutru suma ermons.", - "type": "NarrativeText" - }, - { - "element_id": "8eb33fe9d9a2a68e6a146718f7b97d24", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "por", - "ron", - "fra" - ] - }, - "text": "Crioulo, Upper Guinea (008) Tudu pecadur padidu livre, ninguin ca más ninguin, tudu djusta, tudu tem mesmu diritu. Tudu quin qui padidu, tem si roçon, cu si manera di pensa. Na metadi di utrus I díbidi fassi cussas cu ermondadi.", - "type": "NarrativeText" - }, - { - "element_id": "9a87923b32ddc3eb20ab733920e58198", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Croatian Sva ljudska bića rađaju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i sviješću i treba da jedno prema drugome postupaju u duhu bratstva.", - "type": "NarrativeText" - }, - { - "element_id": "0666ab63ad7ac65ec7290cb18d27749d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ces" - ] - }, - "text": "Czech Všichni lidé rodí se svobodní a sobě rovní co do důstojnosti a práv. Jsou nadáni rozumem a svědomím a mají spolu jednat v duchu bratrství.", - "type": "NarrativeText" - }, - { - "element_id": "cb7b177025447a197e5f95166eeb0282", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "som" - ] - }, - "text": "Dagaare, Southern Nengsaala zaa ba nang dɔge so la o menga, ka o ne o taaba zaa sengtaa noba emmo ane yɛlɛsoobo sobic poɔ. Ba dɔgɛɛ ba zaa ne yɛng ane yɛlɛ-iruu k'a da seng ka ba erɛ yɛlɛ korɔ taa a nga yɔɔmine.", - "type": "NarrativeText" - }, - { - "element_id": "8e66c9e0bff4a344e85d8767b43fd67a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som", - "ind" - ] - }, - "text": "Dagbani Sal' la sala. Bɛhig' be sokam sanimi, din pa la amii. Suhizɔbo be sokam sani; ka nambɔɣu beni. Suhubɔhibo mi bi lan kɔŋ yigunaadam kam sani. Dinzuɣu dimbɔŋɔ zaa wuhiya ka dama di tu kamaata ka ti zaa yu tab' hali ni ti puuni.", - "type": "NarrativeText" - }, - { - "element_id": "b90d9e9d9c05b4f6982b37bbe3c37e9f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "est", - "swa" - ] - }, - "text": "Dangme Adesahi tsuo ɔ, a bɔ mɛ nɛ nɔ fɛɛ nɔ e ye e he, nɛ nɔ tsuaa nɔsɔ ngɛ odehe si himi kɛ he blɔhi a blɔ fa mi. A bɔ mɛ kɛ nɔ́ se kɔmi kɛ he nule juɛmi, nɛ e hia kaa nɔ fɛɛ nɔ nɛ e na nyɛmi suɔmi kɛ ha nɔ tsuaa nɔ.", - "type": "NarrativeText" - }, - { - "element_id": "334d7844545ea360de232426f24cc228", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "dan" - ] - }, - "text": "Danish Alle mennesker er født frie og lige i værdighed og rettigheder. De er udstyret med fornuft og samvittighed, og de bør handle mod hverandre i en broderskabets ånd.", - "type": "NarrativeText" - }, - { - "element_id": "12deb838666ab6083a3dba9696b9fba1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fas" - ] - }, - "text": "Dari تمام افراد بشر آزاد به دنیا می‌آیند و از لحاظ حیثیت و حقوق با هم برابرند. همه دارای عقل و وجدان هستند و باید نسبت به یکدیگر با روح برادری رفتار کنند.", - "type": "NarrativeText" - }, - { - "element_id": "3551715d069482f6ec4dba0cd2418882", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "som" - ] - }, - "text": "Dendi Aduniya kuna n gu ibuna damayo hɛi nɔ dei-dei nn daama nna n burucinitɛrɛ fɔ, n lasabu nna laakari ya nam nn mɔ huro cɛrɛ kuna nyanze tɛrɛ bɔŋɔɔ.", - "type": "NarrativeText" - }, - { - "element_id": "ac128efe598097cdb68a483b1ea1f22c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "fin" - ] - }, - "text": "Dinka, Northeastern Raan thök eben aye dhëëth ka lau nhöm kua thöŋ nhiim eyithiic, kua thɛ̈kic, kua ci yëknhiethku puou, ku bik cëŋ ka ke ye mith etik.", - "type": "NarrativeText" - }, - { - "element_id": "377f3dff94511f4733f9a8fa47685f8a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "fin", - "ind" - ] - }, - "text": "Ditammari Oniti ti pɛi nɖɛ omɔũ yi kpaatri otɔu, kɛ yɛ̃ oniti ba we, o yi ɖo nnɛ fɛhɔ̃fɛ; o mɔkɛmu mɛcii kɛhã mɛyɛmmɛ. Ti tú nɛ ɖo kenyari ti tɔbɛ mbɛ kɛ yie mii ba nkwuɔ ko otɔu ɖau.", - "type": "NarrativeText" - }, - { - "element_id": "5dba7623f7ce02476ce5807c3cc7c5f4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swe", - "eng" - ] - }, - "text": "Drung Avzangf max pyvccuf byv syvnax zyxyyv ef, lifxingx ningx lyangvxinx alf, taixrav angvnikxrav gwanxxix mix syv av duixdaix.", - "type": "UncategorizedText" - }, - { - "element_id": "7ae6fb6c55acb3c2df3ad8079b9d2b5f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld" - ] - }, - "text": "Dutch Alle mensen worden vrij en gelijk in waardigheid en rechten geboren. Zij zijn begiftigd met verstand en geweten, en behoren zich jegens elkander in een geest van broederschap te gedragen.", - "type": "NarrativeText" - }, - { - "element_id": "58343bf1070d7f16553f03d984ab9241", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Dzongkha འགྲོ་བ་མི་ཚུ་ག་ར་དལ་དབང་གི་ཐོག་ལས་སྐྱེས་ཏེ་ཡོདཔ་ལས་ ག་ར་ལུ་བརྩི་མཐོང་དང་ཐོབ་དབང་འདྲ་མཉམ་སྦེ་ཡོད། མི་ཚུ་ག་ར་སྨྲ་ཤེས་དོན་གོ་བའི་མཚན་ཉིད་དང་ལྡནམ་ལས་ ག་ར་གིས་ལཱ་ག་ཅི་ར་འབད་རུང་ གཅིག་གིས་གཅིག་ལུ་སྤུན་ཆའི་འདུ་ཤེས་བསྐྱེད་ཐོག་ལས་ལཱ་འབད་དགོ།", - "type": "Title" - }, - { - "element_id": "e88cfa3a1dc30bd8c76487377938bb87", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "afr" - ] - }, - "text": "Edo Emwan ne agbon hia ne a biere, a bie iran noyan-egbe iran kevbe wee, umwon-mwen o ree etin hia ne o kheke iran khin. A ye ewaen kevbe ekhoe ne o maa wu iran, ne iran gha yin da egbe vbe orhion oghe eten-okpa.", - "type": "NarrativeText" - }, - { - "element_id": "4c8da40656a8284287e43685f7f4447c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "English All human beings are born free and equal in dignity and rights. They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.", - "type": "NarrativeText" - }, - { - "element_id": "7d5794631564e8ff8a2bf245087903a4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "fin", - "spa", - "hrv" - ] - }, - "text": "Ese Ejja Ojjaña esejja ojjaña oyaja yojjaya cuayani quiapame oyajayojjaya quiapame ojjaña eseya quiapame quia tai jjashauabataiquiani ecueya epejji jayo jjaya ojjaña jajji ojjañajaassi eseyajayojja.", - "type": "NarrativeText" - }, - { - "element_id": "5f8fd43155bbf931b71069f21ba6a609", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv", - "spa" - ] - }, - "text": "Esperanto Ĉiuj homoj estas denaske liberaj kaj egalaj laŭ digno kaj rajtoj. Ili posedas racion kaj konsciencon, kaj devus konduti unu al alia en spirito de frateco.", - "type": "NarrativeText" - }, - { - "element_id": "e59c6075ee4dbde4faa66c2bdc180029", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est" - ] - }, - "text": "Estonian Kõik inimesed sünnivad vabadena ja võrdsetena oma väärikuselt ja õigustelt. Neile on antud mõistus ja südametunnistus ja nende suhtumist üksteisesse peab kandma vendluse vaim.", - "type": "NarrativeText" - }, - { - "element_id": "699838930374f69143263bd99d88883e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Even Бэйил бокэтчур омэн хилкич нян урумкэр балдаритно, теми ноҥардук эгдьэн ҥи‐да ачча. Бэйил бөкэтчур мэн долан акагчимур биннэтын.", - "type": "NarrativeText" - }, - { - "element_id": "8164afd787069e69d3a6bed633cfdb21", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Evenki Упкат илэл ты̄нмукирди, урэ̄лди мэ̄нңи са̄рича̄ди балдыдяра. Нуңартын дялитви, һалдяндыви биси, мэмэгӣлвэр аяралды̄дяна тэдет о̄мамачитын.", - "type": "NarrativeText" - }, - { - "element_id": "8ba9631d337f32fb2b5a0049718f7162", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "pol" - ] - }, - "text": "Éwé Wodzi amegbetɔwo katã ablɔɖeviwoe eye wodzena bubu kple gomekpɔkpɔ sɔsɔe. Susu kple dzitsinya le wo dometɔ ɖesiaɖe si eyata wodze be woanɔ anyi le ɖekawɔwɔ blibo me.", - "type": "NarrativeText" - }, - { - "element_id": "4dad8f50be71b880b8d1cd3aa2083177", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "pol" - ] - }, - "text": "Fante Wɔwo adasa nyina to fahodzi mu, na hɔn nyina yɛ pɛr wɔ enyimnyam na ndzinoa mu. Wɔmaa hɔn nyina adwen na tsibowa, na ɔwɔ dɛ hɔn nkitahodzi mu ndzeyɛɛ da no edzi dɛ wɔyɛ enuanom.", - "type": "NarrativeText" - }, - { - "element_id": "f8e68d4590ad494f5d3039e113c1ac46", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nor" - ] - }, - "text": "Faroese Øll menniskju eru fødd fræls og jøvn til virðingar og mannarættindi. Tey hava skil og samvitsku og eiga at fara hvørt um annað í bróðuranda.", - "type": "NarrativeText" - }, - { - "element_id": "2f3af719eba5f3392f87df0894e56c42", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fas" - ] - }, - "text": "Farsi, Western تمام افراد بشر آزاد بدنیا میایند و از لحاظ حیثیت و حقوق با هم برابرند. همه دارای عقل و وجدان میباشند و باید نسبت بیکدیگر با روح برادری رفتار کنند.", - "type": "NarrativeText" - }, - { - "element_id": "c031a4bdd55bf8f8599aaeda8cbe0d56", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "ind" - ] - }, - "text": "Fijian Era sucu ena galala na tamata yadua, era tautauvata ena nodra dokai kei na nodra dodonu. E tiko na nodra vakasama kei na nodra lewaeloma, sa dodonu mera veidokadokai ena yalo ni veitacini.", - "type": "NarrativeText" - }, - { - "element_id": "b70785870cc673f7dcbb24c8464d43fc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin" - ] - }, - "text": "Finnish Kaikki ihmiset syntyvät vapaina ja tasavertaisina arvoltaan ja oikeuksiltaan. Heille on annettu järki ja omatunto, ja heidän on toimittava toisiaan kohtaan veljeyden hengessä.", - "type": "NarrativeText" - }, - { - "element_id": "ecc193afbaf5bf317c868860f5dfc5ec", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin" - ] - }, - "text": "Finnish, Kven Kaikki ihmiset synnythään vaphaina, ja heilä kaikila oon sama ihmisarvo ja samat ihmisoikkeuet. Het oon saanheet järjen ja omatunnon, ja het piethään elläät toinen toisen kans niin ko veljet keskenhään.", - "type": "NarrativeText" - }, - { - "element_id": "e2a252e076d508cd7e312c25eaf70331", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Fon Acɛ, susu kpo sisi ɖokpo ɔ kpo wɛ gbɛtɔ bi ɖo ɖò gbɛwiwa tɔn hwenu; ye ɖo linkpɔn bɔ ayi yetɔn mɛ kpe lo bɔ ye ɖo na do alɔ yeɖee ɖi nɔvinɔvi ɖɔhun.", - "type": "NarrativeText" - }, - { - "element_id": "d26195c0225bad321fc98f526b1fb27b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "French Tous les êtres humains naissent libres et égaux en dignité et en droits. Ils sont doués de raison et de conscience et doivent agir les uns envers les autres dans un esprit de fraternité.", - "type": "NarrativeText" - }, - { - "element_id": "f5ce0eb3d199445ab33436a396fca8cb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "afr" - ] - }, - "text": "Frisian, Western Alle minsken wurde frij en gelyk yn weardigens en rjochten berne. Hja hawwe ferstân en gewisse meikrigen en hearre har foar inoar oer yn in geast fan bruorskip te hâlden en te dragen.", - "type": "NarrativeText" - }, - { - "element_id": "0da991393fa9f40d78c4143c3a25b02a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Friulian Ducj i oms a nassin libars e compagns come dignitât e derits. A an sintiment e cussience e bisugne che si tratin un culaltri come fradis.", - "type": "NarrativeText" - }, - { - "element_id": "216db5a1011f211d9206a47a9e0e4839", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "dan" - ] - }, - "text": "Fulfulde, Nigerian Innama aadeeji fof poti, ndimɗidi e jibinannde to bannge hakkeeji. Eɓe ngoodi miijo e hakkilantaagal ete eɓe poti huufo ndirde e nder ɓ iynguyummaagu.", - "type": "NarrativeText" - }, - { - "element_id": "d245ad5ed3e4ee8727b8152745ffdba6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "tur", - "spa" - ] - }, - "text": "Fulfulde, Nigerian (2) Ɓi-aadama fuu dimo danyete/jibinte o fotan be koomoye e neɗɗaaku be hakkeeji. ɓe ndokkaaɓe hakkiilo ngaandi nden bo ɓe kuutindiray hakkunde maɓɓe nder yiɗyiɗɗirki mbandiraagu.", - "type": "NarrativeText" - }, - { - "element_id": "71e526a7453aa9c044c6f695d1fe4c78", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hun" - ] - }, - "text": "Fur kwa-sí nyéttiŋ baajtólá kereli nás nisila na ta̱gɨdɨŋ arrá ka̱ɨŋ, Naŋ-sí ugola na kilmaŋá arrá ka̱ɨŋ namá in lóŋ áláŋ sǔrŋâ-sí kí jaiŋa in kéél ná sǔrŋâ suurꞌíŋ bârŋa.", - "type": "NarrativeText" - }, - { - "element_id": "dfd804850bd4d6daab5db7227283c3ab", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som" - ] - }, - "text": "Ga Afɔ gbɔmɔ fɛɛ gbɔmɔ yɛ agbojee mli, kɛ hegbɛ ko ni damɔ ŋɛlɛ koome nɔ. Gbɔmɛi fɛɛ yɛ jwɛŋmɔ kɛ henilee, ni no hewɔ lɛ esa akɛ amɛhe ahi shi yɛ nyɛmi suɔmɔ mli.", - "type": "NarrativeText" - }, - { - "element_id": "38140682ca9cf0b5c7f1cf203b331589", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "cat" - ] - }, - "text": "Gaelic, Irish Saoláitear na daoine uile saor agus comhionann ina ndínit agus ina gcearta. Tá bauidh an réasúin agus an choinsiasa acu agus dlíd iad féin d'iompar de mheon bhrthreachais i leith a chéile.", - "type": "NarrativeText" - }, - { - "element_id": "c74c5c12c1d20c63c0512bda5ec488ee", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Gaelic, Scottish Tha gach uile dhuine air a bhreth saor agus co-ionnan ann an urram 's ann an còirichean. Tha iad air am breth le reusan is le cogais agus mar sin bu chòir dhaibh a bhith beò nam measg fhein ann an spiorad bràthaireil,", - "type": "NarrativeText" - }, - { - "element_id": "adb7eafcda17469d6dffe53ac281b9e7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Gagauz Insannar hepsi duuêrlar serbest hem birtakım kendi kıymetindä hem haklarında. Onnara verilmiş akıl hem üz da läazım biri-birinä davransınnar kardaşlık ruhuna uygun.", - "type": "NarrativeText" - }, - { - "element_id": "d838922d035c343059a70e88f83100af", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Galician Tódolos seres humanos nacen libres e iguais en dignidade e dereitos e, dotados como están de razón e conciencia, díbense comportar fraternalmente uns cos outros.", - "type": "NarrativeText" - }, - { - "element_id": "505a5c77309f7753fe016e50776647b7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl", - "ind" - ] - }, - "text": "Ganda Abantu bazaalibwa nga balina eddembe n'obuyinza ebyenkanankana, batondebwa nga balina amagezi era nga basobola okwawula ekirungi n'ekibi bwebatyo, buli omu agwana okuyisa munne nga muganda we.", - "type": "NarrativeText" - }, - { - "element_id": "ec7ace2c582cd24ef64d447f5e1e7a08", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Garifuna Sun gürigia nasíruati yuti lun, lidan úarani, lawiwanduní libágari kai le aubai labúsienra, gatu giñe lanagun lungua buidu hadan líbegu.", - "type": "UncategorizedText" - }, - { - "element_id": "3db8c991f134adb8e84617cd84e56d43", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "pol" - ] - }, - "text": "Gen Agbetɔwo kpata le jijimɛa, ɖo vosinɔnɔ, nyi gbèsɔɛ́mɛ́wó le nujɔnunnyi ku goɖoejisewo, amɛbusewo mɛ. Tagbɔ le woa si, eye wɔnawo sɔdoda woanɔnɔwo gbɔa la nyi nɔ́visilélé.", - "type": "NarrativeText" - }, - { - "element_id": "cb7127a24ce99f60f18c47121fcbe3cb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est" - ] - }, - "text": "Georgian ყოველი ადამიანი იბადება თავისუფალი და თანასწორი თავისი ღირსებითა და უფლებებით. მათ მინიჭებული აქვთ გონება და სინდისი და ერთმანეთის მიმართ უნდა ექცეოდნენ ძმობის სულისკვეთებით.", - "type": "NarrativeText" - }, - { - "element_id": "60e95060440c3ac89b53764c839a9658", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu" - ] - }, - "text": "German, Standard (1901) Alle Menschen sind frei und gleich an Würde und Rechten geboren. Sie sind mit Vernunft und Gewissen begabt und sollen einander im Geist der Brüderlichkeit begegnen.", - "type": "NarrativeText" - }, - { - "element_id": "d9454188531f323f4587d2668a35dce4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu" - ] - }, - "text": "German, Standard (1996) Alle Menschen sind frei und gleich an Würde und Rechten geboren. Sie sind mit Vernunft und Gewissen begabt und sollen einander im Geist der Brüderlichkeit begegnen.", - "type": "NarrativeText" - }, - { - "element_id": "82bf90db0534cabdc2efe2971f9bb4c6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "bul", - "rus" - ] - }, - "text": "Gilyak Сик нивгун куғытӻарта, пʼинамад яймта адяй правоғир̌ пʼӊафқ-ӊафқғир̌ салӻата ӿат пантадғун.", - "type": "Title" - }, - { - "element_id": "d61fdd2d22e77149dff43d70d62d722f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "slv" - ] - }, - "text": "Gonja Bu kurwe dimedi kikɛ mobe kumu so, nɛ mobe, eyilikpa, kesheŋ nɛ kashinteŋ maŋ kɔr eko peyɛ to. Nyinpela sa dimedi kikɛ lakal nɛ mfɛra fanɛ bu chena abarso kelepo so.", - "type": "NarrativeText" - }, - { - "element_id": "0361867eb371916c85e13fcc3dde7f4b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ell" - ] - }, - "text": "Greek (monotonic) Όλοι οι άνθρωποι γεννιούνται ελεύθεροι και ίσοι στην αξιοπρέπεια και τα δικαιώματα. Είναι προικισμένοι με λογική και συνείδηση, και οφείλουν να συμπεριφέρονται μεταξύ τους με πνεύμα αδελφοσύνης.", - "type": "NarrativeText" - }, - { - "element_id": "ef30df67b6cbf4e05af379e61e529561", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ell" - ] - }, - "text": "Greek (polytonic) Ὅλοι οἱ ἄνθρωποι γεννιοῦνται ἐλεύθεροι καὶ ἴσοι στὴν ἀξιοπρέπεια καὶ τὰ δικαιώματα. Εἶναι προικισμένοι μὲ λογικὴ καὶ συνείδηση, καὶ ὀφείλουν νὰ συμπεριφέρονται μεταξύ τους μὲ πνεῦμα ἀδελφοσύνης.", - "type": "NarrativeText" - }, - { - "element_id": "a8aaedf9144ce4af4a672873d93945c2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slk", - "por", - "spa", - "ind" - ] - }, - "text": "Guaraní, Paraguayan Mayma yvypóra ou ko yvy ári iñapytlʼyre ha eteĩcha dignidad ha derecho jeguerekópe; ha ikatu rupi oikuaa añetéva ha añeteʼyva, iporãva ha ivaíva, tekotevẽ pehenguéicha oiko oñondivekuéra.", - "type": "NarrativeText" - }, - { - "element_id": "1a8dccbb2225da58c6c32c944346a88f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Guarayu Opakatu ava yoro’a nda’ei tembigwaigwa oyoyatupri, sekotupri, vaëra, imboeteisara, oikatu ipi’a yemoñeta, imbaekua, ndiyai yurekorairai ñepëi pëi ambua rese.", - "type": "NarrativeText" - }, - { - "element_id": "2aff799c80d0ba06e344f3b917c6aa5a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "guj" - ] - }, - "text": "Gujarati પ્રતિષ્ઠા અને અધિકારોની દૃષ્ટિએ સર્વ માનવો જન્મથી સ્વતંત્ર અને સમાન હોય છે. તેમનામાં વિચારશક્તિ અને અંતઃકરણ હોય છે અને તેમણે પરસ્પર બંધુત્વની ભાવનાથી વર્તવું જોઇએ.", - "type": "NarrativeText" - }, - { - "element_id": "7c7879f1335e2e8f7c0ca4a80cb6d9fc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Gumuz Dubꞌaga bꞌaga metaam metaam alamaam kamaanzaakꞌoma kasꞌe bipokꞌoga kamadꞌab maafucꞌakꞌwa haaga bacꞌaga tso. Kábꞌaga jajanda kwa jala etigafalagash maꞌiiya nago metaagwa eyaal yida-eba bicꞌaga tso.", - "type": "NarrativeText" - }, - { - "element_id": "c591dbcd933d69898871c75fc9b2c5b8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Haitian Creole French (Kreyol) Tout moun fèt lib, egal ego pou diyite kou wè dwa. Nou gen la rezon ak la konsyans epi nou fèt pou nou aji youn ak lot ak yon lespri fwatènite.", - "type": "NarrativeText" - }, - { - "element_id": "1caef318c81d61c240de817182b5b56b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra", - "eng" - ] - }, - "text": "Haitian Creole French (Popular) Tout moun sou tè a fèt tou lib. Tout gen menm valè (nan je lasosyete), tout moun gen menm dwa devan Lalwa. Tout moun fèt ak yon bonsans, tout fèt ak yon konsyans epi youn fèt pou trete lòt tankou frè ak sè.", - "type": "NarrativeText" - }, - { - "element_id": "ac23b37d7fc3617b806d164fb38da99e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "som", - "est" - ] - }, - "text": "Hani Aqsol liq yoqdeivq yoqpyuq bo, meeqyaovq ssolnei colpyuq qiq kov dei. Davqtavcolssaq neenyuq bel neema meeq ya siq, laongaoq meilnaol nadul meil e gaq ssol hhyul hha bavqduv nia.", - "type": "NarrativeText" - }, - { - "element_id": "100bdd3a0bc9a25394f34018b95871fe", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Hausa Duk ‘yan’adan ana haihuwarsu ne a matsayin ‘yantattun ‘ya’ya, kuma mutuncinsu da haqqoqinsu daidai yake da na kowa. Suna da tunani da cikakken hankali, saboda haka ake son duk mu’amalar da za su yi, ta kasance akwai ‘yan’uwantaka a tsakani.", - "type": "NarrativeText" - }, - { - "element_id": "19ff46e13339eab9d9fce6566dad6102", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Hausa (Niger) Su dai ƴan‐adam, ana haifuwarsu ne duka ƴantattu, kuma kowannensu na da mutunci da hakkoki daidai da na kowa. Suna da hankali da tunani, saboda haka duk abin da za su aikata wa juna, ya kamata su yi shi a cikin ƴan‐uwanci.", - "type": "NarrativeText" - }, - { - "element_id": "39fce89f870171ba68c60c4aaaeb5509", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Hausa (Nigeria) Su dai ‘yan-adam, ana haifuwarsu ne duka ‘yantattu, kuma kowannensu na da mutunci da hakkoki daidai da na kowa. Suna da hankali da tunani, saboda haka duk abin da za su aikata wa juna, ya kamata su yi shi a cikin ‘yan-uwanci.", - "type": "NarrativeText" - }, - { - "element_id": "5a888adab3cc776c69ebb4b588db4bfb", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "lav" - ] - }, - "text": "Hawaiian Hānau kū’oko’a ‘ia nā kānaka apau loa, a ua kau like ka hanohano a me nā pono kīvila ma luna o kākou pākahi. Ua ku’u mai ka no’ono’o pono a me ka ‘ike pono ma luna o kākou, no laila, e aloha kākou kekahi i kekahi.", - "type": "NarrativeText" - }, - { - "element_id": "9bce25b61dc4faf00ebf9ae5bedd19aa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "heb" - ] - }, - "text": "Hebrew כל בני אדם נולדו בני חורין ושווים בערכם ובזכויותיהם. כולם חוננו בתבונה ובמצפון, לפיכך חובה עליהם לנהוג איש ברעהו ברוח של אחוה.", - "type": "NarrativeText" - }, - { - "element_id": "c47d3d0f8955a5c0769b982de1bcab85", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Hiligaynon Ang tanan nga tao ginbun-ag nga hilway kag may pag-alalangay sa dungog kag katarungan. Sila ginhatagan sang pagpamat-od kag konsensya kag nagakadapat nga magbinuligay sa kahulugan sang pag-inuturay.", - "type": "NarrativeText" - }, - { - "element_id": "8af5d2f7586f72942fcfc21e4f9f0e7e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hin" - ] - }, - "text": "Hindi सभी मनुष्यों को गौरव और अधिकारों के मामले में जन्मजात स्वतन्त्रता और समानता प्राप्त है । उन्हें बुद्धि और अन्तरात्मा की देन प्राप्त है और परस्पर उन्हें भाईचारे के भाव से बर्ताव करना चाहिए ।", - "type": "UncategorizedText" - }, - { - "element_id": "b992780a7e7cfec805b61d50bd3cbb25", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "ind", - "som" - ] - }, - "text": "Hindustani, Sarnami Sab djanne aadjádi aur barabar paidaa bhailèn, iddjat aur hak mê. Ohi djanne ke lage sab ke samadj-boedj aur hierdaai hai aur doesare se sab soemmat sè, djaane-maane ke chaahin.", - "type": "NarrativeText" - }, - { - "element_id": "d4294c077d745315e3700a34906a1a37", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "nld", - "fra" - ] - }, - "text": "Hmong, Northern Qiandong Laix laix diangl dangt lol sob dab yangx ghax maix zit yef, niangb diot gid zenb nieef haib gid quaif lit gid nongd jus diel pinf denx. Nenx dol maix laib lix xent haib jox hvib vut, nenx dol nongt liek bed ut id xit deit dait.", - "type": "NarrativeText" - }, - { - "element_id": "d5c28cb0359d5c0d40ae0645be22c03b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "eng" - ] - }, - "text": "Hmong, Southern Qiandong Leb leb nis zib youl nangs, mex ad sheit nangd zend yanl nhangs njanl lib. Mix mex lix xinb gaot liangt send, leb leb lies nhangs ghob nab ghob geud nangd.", - "type": "NarrativeText" - }, - { - "element_id": "3ae5d0023d02b871b6b7567348fbd99b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ind" - ] - }, - "text": "Hmong Njua Cuat lenx cuat dol bongb deul ndax dex douf muax zif youx, nyaob shout zunb yinx tab ndas dos id, dax zis ib suk. Nil buab daf lol jaox muax lid xinf hlub hout tab liangx xinb shab nzhuk, yinf gaib keuk suk gud dix mol lol nit jinb shenx lol shib daf shib hlad.", - "type": "NarrativeText" - }, - { - "element_id": "4113619dd86b7bf65f70dd31f3155ce1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "hun" - ] - }, - "text": "Huastec (San Luís Potosí) Patal an inik ani an uxum u wa'tsinal walkadh abal jununúl kin bats'uw an alwa'taláb ani ka pidhan in éy jant'ini' in tomnál; in kwa'al in tsalpádh ani in k'ayá' abal kin k'anidha' in juntal.", - "type": "NarrativeText" - }, - { - "element_id": "cec56f0f701b47b7615015993ec87eaa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "cat", - "ces" - ] - }, - "text": "Huastec (Sierra de Otontepec) Kuentsal nap wah-chínal tee ti chabal jayechek-i antip wah-chínal, bá tamá maxak a pulik maxak in exlal, jununul aní ni chap aní jaxtam ko-yal kip le-naxín aní ki k-ana ti ba.", - "type": "NarrativeText" - }, - { - "element_id": "6f91b5cb130f52af680e4b1b597c984a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "est", - "som", - "ind" - ] - }, - "text": "Huastec (Veracruz) Ejtal an kw'ajiiltsik u wa'chinal kweteem ani chu'udh k'al an chu'uxtalaab ani yajat ka k'aak'naaxin juun ani juun.", - "type": "NarrativeText" - }, - { - "element_id": "68c1e44b4d3af66e1c5cddb5a8861a91", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "por" - ] - }, - "text": "Huitoto, Murui Nana caɨ comuillamona dama caɨ abido itɨcaɨ. Caɨ comuillamona jɨaɨmɨe anamo iñedɨcaɨ. Nana daje facaiconi itɨcaɨ. Abɨ uiñuanona comuidɨcaɨ. Danɨ coninɨrie caɨ nabairilla.", - "type": "NarrativeText" - }, - { - "element_id": "35c2ba2ee3067a7d3d5509a2f11f8123", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hun" - ] - }, - "text": "Hungarian Minden. emberi lény szabadon születik és egyenlő méltósága és joga van. Az emberek, ésszel és lelkiismerettel bírván, egymással szemben testvéri szellemben kell hogy viseltessenek.", - "type": "NarrativeText" - }, - { - "element_id": "57d454640d5878f3ce695e2f10449346", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "tur" - ] - }, - "text": "Ibibio Kpukpuru owo emana nte amanison, enyun enyene ukem ukem uku ye unen. Eyoho mmo ye ukeme ndikere nkpo, ndinyun nyene esit, ke ntre, mmo enyene ndiman nkpo mbana kiet eken ke esit ndito eka.", - "type": "NarrativeText" - }, - { - "element_id": "d1120c74094e3c70d2191f6d40987753", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nor" - ] - }, - "text": "Icelandic Hver maður er borinn frjáls og jafn öðrum að virðingu og réttindum. Menn eru gæddir vitsmunum og samvizku, og ber þeim að breyta bróðurlega hverjum við annan.", - "type": "NarrativeText" - }, - { - "element_id": "7a90d2a44053e814d2d0cdd9e816e459", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "spa" - ] - }, - "text": "Ido Omna homi naskas libera ed egala relate digneso e yuri. Li es dotita per raciono e koncienco e devas agar vers l'una l'altra en spirito di frateso.", - "type": "NarrativeText" - }, - { - "element_id": "c061731c2409f1d04154bcb99040df32", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Idoma Ęgę ni modudu acę kęcę nya bęcę ęhehi aa ,hibi ęgͻ ma acę duu jonjilę ipu kocęgba nͻcę cęgba męml’ojonjilę ipu ͻdah ni yabͻ ͻcę nya. Odudu acę kwu ђwule ml’ohili otu męml’ocai kęla jͻcę ͻha ni yipu ͻtu ͻcę aa, higbͻ ma ͻcę higbo yͻda męml’ ͻmpa gunu lę bͻinę nu ma.", - "type": "NarrativeText" - }, - { - "element_id": "c3dc3590b2338d3585c67664e25eb878", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Igbo A mụrụ mmadụ nile n'ohere nakwa nha anya ugwu na ikike. E nyere ha uche na mmụọ ime ihe ziri ezi nke na ha kwesiri ịkpaso ibe ha agwa n'obi nwanne na nwanne.", - "type": "NarrativeText" - }, - { - "element_id": "050a0685e37c5cdf1484af7fb81846c0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "slv", - "hrv", - "ind" - ] - }, - "text": "Ijo, Southeast Kim’ owoumo se, keni bara ki na, pa zimi, ose keni bara kemi. Kim’se ye iroro, mani ikiou nana, enini kim’se dudu tari teme nana weri iyenri.", - "type": "NarrativeText" - }, - { - "element_id": "ea67730b04f48bb8c5b63709621f2034", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Ilocano Amin nga tao nga sibibiag ket naiyanak a siwawayawaya ken addaan iti agpapada nga dayaw ken kalintegan. Naikkanda ti panagikalintegan ken konsensya a nasken ti panagtitinnulong iti meysa ken meysa iti espiritu nga nainkak-absatan.", - "type": "NarrativeText" - }, - { - "element_id": "7cec6b86e84db86bb3df3b8e636075e3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Indonesian Semua orang dilahirkan merdeka dan mempunyai martabat dan hak-hak yang sama. Mereka dikaruniai akal dan hati nurani dan hendaknya bergaul satu sama lain dalam semangat persaudaraan.", - "type": "NarrativeText" - }, - { - "element_id": "e2a669a4be13da0177954c07c8ca0014", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "fra" - ] - }, - "text": "Interlingua Tote le esseres human nasce libere e equal in dignitate e in derectos. Illes es dotate de ration e de conscientia e debe ager le unes verso le alteres in un spirito de fraternitate.", - "type": "NarrativeText" - }, - { - "element_id": "c08152bc9c1cbc1930714b7051e6100a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Inuktitut, Eastern Canadian ᐃᓅᔪᓕᒫᑦ ᐊᓂᖅᑎᕆᔪᓕᒫᑦ ᐃᓅᓚᐅᕐᒪᑕ ᐃᓱᒪᕐᓱᕐᖢᑎᒃ ᐊᒻᒪᓗ ᐊᔾᔨᐅᖃᑎᒌᒃᖢᑎᒃ ᓂᕐᓱᐊᖑᓂᒃᑯᑦ ᐊᒻᒪᓗ ᐱᔪᓐᓇᐃᑎᑎᒍᑦ. ᐃᓱᖃᖅᑐᖁᑎᖃᕐᑎᑕᐅᕙᓕᕐᐳᑦ ᐱᔾᔪᑎᖃᕐᓂᒃᑯᑦ ᖃᑕᙳᑎᒌᑦᑎᐊᕆᐊᖃᕐᓂᒃᑯᓪᓗ.", - "type": "NarrativeText" - }, - { - "element_id": "85b8bd3d1031ba2138519e0c94a05535", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est" - ] - }, - "text": "Inuktitut, Greenlandic Inuit tamarmik inunngorput nammineersinnaassuseqarlutik assigiimmillu ataqqinassuseqarlutillu pisinnaatitaaffeqarlutik. Solaqassusermik tarnillu nalunngissusianik pilersugaapput, imminnullu iliorfigeqatigiittariaqaraluarput qatanngutigiittut peqatigiinnerup anersaavani.", - "type": "NarrativeText" - }, - { - "element_id": "6e8030f949832ac1e4d5632bc1a06b48", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Italian Tutti gli esseri umani nascono liberi ed eguali in dignità e diritti. Essi sono dotati di ragione e di coscienza e devono agire gli uni verso gli altri in spirito di fratellanza.", - "type": "NarrativeText" - }, - { - "element_id": "ec65722b37347cefd9069c89a8e75791", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Japanese", - "type": "Title" - }, - { - "element_id": "57bbff46bb89b26b933206afe0fd8904", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "jpn" - ] - }, - "text": "すべての人間は、生まれながらにして自由であり、かつ、尊厳と権利とについて平等である。人間は、理性と良心とを授けられており、互いに同胞の精神をもって行動しなければならない。", - "type": "Title" - }, - { - "element_id": "5e6d21f4f6cf7b92b7fdaecf265580aa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Japanese (Osaka)", - "type": "Title" - }, - { - "element_id": "11becf872133958b85928710255eb2cc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "jpn" - ] - }, - "text": "すべての人間は、生まれながらにして自由やし、かつ、尊厳と権利とについて平等や。人間は、理性と良心とを授けられており、互いに同胞の精神をもって行動しな。", - "type": "Title" - }, - { - "element_id": "673e122c097796c5aa83f02476e37529", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Japanese (Tokyo)", - "type": "Title" - }, - { - "element_id": "491550640c5496ae9b9e41b4c6cc14f0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "jpn" - ] - }, - "text": "全部の人間は、生まれながらにして自由であり、かつ、尊厳と権利と について平等である。人間は、理性と良心とを授けられており、互いに同 胞の精神をもって行動しなければならない。", - "type": "Title" - }, - { - "element_id": "d1c8d98009aff8c745beed6b2d4c44f3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Javanese (Javanese)", - "type": "Title" - }, - { - "element_id": "36abfab21253834165ada6ce4b89b5e6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "꧋ꦱꦧꦼꦤ꧀ꦲꦸꦮꦺꦴꦁꦏꦭꦲꦶꦂꦫꦏꦺꦏꦤ꧀ꦛꦶꦩꦂꦢꦶꦏꦭꦤ꧀ꦢꦂꦧꦺꦩꦂꦠꦧꦠ꧀ꦭꦤ꧀ꦲꦏ꧀ꦲꦏ꧀ꦏꦁꦥꦝ꧉​ꦏꦧꦺꦃꦥꦶꦤꦫꦶꦁꦔꦤ꧀ꦲꦏꦭ꧀ꦭꦤ꧀ꦏꦭ꧀ꦧꦸꦱꦂꦠꦏꦲꦗꦧ꧀ꦥꦱꦿꦮꦸꦁꦔꦤ꧀ꦲꦁꦒꦺꦴꦤ꧀ꦤꦺꦩꦼꦩꦶꦠꦿꦤ꧀ꦱꦶꦗꦶꦭꦤ꧀ꦱꦶꦗꦶꦤꦺꦏꦤ꧀ꦛꦶꦗꦶꦮꦺꦴꦱꦸꦩꦢꦸꦭꦸꦂ꧉​", - "type": "Title" - }, - { - "element_id": "117934bb8b775293442e8ca3921ad1da", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Javanese (Latin) Saben uwong kalairake kanthi mardika lan darbe martabat lan hak-hak kang padha. Kabeh pinaringan akal lan kalbu sarta kaajab pasrawungan anggone memitran siji lan sijine kanthi jiwo sumadulur.", - "type": "NarrativeText" - }, - { - "element_id": "6b54f0a53f2c7bb4545835a761d4654b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Jola-Fonyi Bukanak búrom nan kuwolimi kurere kererer di waafaw búrom. Kubabaj poop búyejet di karampenoor.", - "type": "NarrativeText" - }, - { - "element_id": "b2c33dfdb2855a8786e1145a6dbbedc2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hun", - "vie" - ] - }, - "text": "Jula Wólo’ lá, hádamaden’ bɛɛ ye hɔrɔn ye, bɛɛ ká kán lànbe ní hákɛyaw lá. Mɔgɔ bɛɛ ye hákilitigi ye, bɛɛ ye hákilima ye ; ò là, ù ká kán kà ɲgɔn mína ní bádenya ye.", - "type": "NarrativeText" - }, - { - "element_id": "78522b71b29080a2ab8b60b4f8bdb929", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "hrv", - "swa" - ] - }, - "text": "K'iche', Central Konojel ri winaq are taq ke'alaxik pa junaman ya'tal chkech kakechab'ej ronojel ri utzil; utz kakib'ano, kakichomaj, kakib'ij jasa je' ri k'o pa kanima, rumal che ri junam kib'antajik. Rajawaxik xuqe' kakimulij kib' che utzukuxuk ri loq'ob'al pa we uwachulew.", - "type": "NarrativeText" - }, - { - "element_id": "03b0bbddb1137224b43b690dfcc5b506", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Kabardian Цӏыху псори щхьэхуиту, я щӏыхьымрэ я хуэфащэхэмрэкӏэ зэхуэдэу къалъхур. Акъылрэ зэхэщӏыкӏ гъуазэрэ яӏэщи, зыр зым зэкъуэш зэхащІэ яку дэлъу зэхущытын хуейхэщ.", - "type": "NarrativeText" - }, - { - "element_id": "5da5e2f597a0e6fce26a5359c72395b3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Kabiyé Palʊlʊʊ ɛyaaa nɛ pa-tɩ yɔɔ wɛʊ kpaagbaa nɛ pɛwɛɛ kɩmaŋ wala ɛsɩndaa. Palʊlʊʊ-wɛ nɛ pɔ-lɔŋ nɛ pa-maɣzɩm; mbʊ yekina nɛ pɔsɔɔlɩ ɖama se pɛkɛ ɛyaa pa-tɩŋgɛ.", - "type": "NarrativeText" - }, - { - "element_id": "b1298a59ae52d3a285db4b52acce1f32", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "por" - ] - }, - "text": "Kabuverdianu Tudo ser humano na ês mundo nacê libri e igual na sê dignidade e na sês drêto. Na sês razon e na sês concénça, tudo arguem debê porcêdê pa co tudo guenti na sprito di fraternidadi.", - "type": "NarrativeText" - }, - { - "element_id": "d1fe7eed38b94d986fb537125627c4f2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Kafa Ubbe ashi bushoo shiijjeto tatoonaa ame megoona aalloon, oogoonaa wuroonon yechiiniye. Ikkoo baroona manittine shalligoonaa naboona yeshet shalligoon boono shaddeyoo hakkiimm qello boonoshich ichete.", - "type": "NarrativeText" - }, - { - "element_id": "f6f8a776d36f4db6ffdd50e83fee6488", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kan" - ] - }, - "text": "Kannada ಎಲ್ಲಾ ಮಾನವರೂ ಸ್ವತಂತ್ರರಾಗಿಯೇ ಜನಿಸಿದ್ದಾರೆ. ಹಾಗೂ ಘನತೆ ಮತ್ತು ಹಕ್ಕುಗಳಲ್ಲಿ ಸಮಾನರಾಗಿದ್ದಾರೆ. ವಿವೇಕ ಮತ್ತು ಅಂತಃಕರಣಗಳನ್ನು ಪಡೆದವರಾದ್ದರಿಂದ ಅವರು ಪರಸ್ಪರ ಸಹೋದರ ಭಾವದಿಂದ ವರ್ತಿಸಬೇಕು.", - "type": "NarrativeText" - }, - { - "element_id": "2600735e35ce8a6dc8243d2269bbeee5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "som", - "hun" - ] - }, - "text": "Kanuri, Central Adamgana woso kambe katambo ye daraja-a hakkiwa-ason kalkalye. Hankal-a nazaru-asoro kəzəpkə ye suro hal nəmharamiben kamazasoga letaiyin ye.", - "type": "NarrativeText" - }, - { - "element_id": "a841ec547609322347b08be60cc1c722", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "tgl" - ] - }, - "text": "Kaonde Bonse bantu basemwa bakasuluka kabiji baesakena pamo mubuneme. Baji na maana a kulanguluka kabiji bobila bantu bakwabo byubilo bakwibasekesha.", - "type": "NarrativeText" - }, - { - "element_id": "2e5fe352907c2d71abf3a0283032775f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv", - "hrv", - "est" - ] - }, - "text": "Kaqchikel, Central Konojel ri winaqi' kan kalaxib'en pe ri kolotajïk, ri junan kiq'ij, ri junan kejqalen, junan kich'ojib'al pa kik'aslen, xa achi'el k'a ri kik'ojlen, ri kinojib'al kichajin xa tik'amun k'a chi nimaläj konojel xtikajo' ki'.", - "type": "NarrativeText" - }, - { - "element_id": "23d27d0652af0739dbaa674e88fc9ae4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Karakalpak Ҳәмме адамлар өз қәдир-қымбаты және ҳуқықларында еркин ҳәм тең болып туўылады. Оларға ақыл ҳәм ҳүждан берилген болып, бир-бирине туўысқанлық руўхындағы қатнаста болыўы тийис.", - "type": "NarrativeText" - }, - { - "element_id": "c6f580433e84639a19b178da5dc4b3a2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "fin" - ] - }, - "text": "Karelian Kai rahvas roittahes vällinny da taza-arvozinnu omas arvos da oigevuksis. Jogahizele heis on annettu mieli da omatundo da heil vältämättäh pidäy olla keskenäh, kui vellil.", - "type": "NarrativeText" - }, - { - "element_id": "87e368f61c4a1ba6e0a5743d4d2d41b2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "swa" - ] - }, - "text": "Kasem Ba loge nɔɔna maama se ba taa ye bedwe mo ba ŋwea de ba chega seini, ye fefeo teira kɔtaa. Wɛ pɛ ba swa de boboŋa mo se ba taa ye nubiu daane ye ba jege da ŋwaŋa.", - "type": "NarrativeText" - }, - { - "element_id": "1908a740d8aedadb521f39432a6cbed8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ukr", - "rus" - ] - }, - "text": "Kazakh Барлық адамдар тумысынан азат және қадір‐қасиеті мен кұқықтары тең болып дүниеге келеді. Адамдарға ақыл‐парасат, ар‐ождан берілген, сондықтан олар бір‐бірімен туыстық, бауырмалдық қарым‐қатынас жасаулары тиіс.", - "type": "NarrativeText" - }, - { - "element_id": "75b6a6751bcdf3ddfc1745d8e7118815", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ukr", - "rus" - ] - }, - "text": "Khakas Полған на кізі пос паза тиң тӧріпче паза тиң постың синін пілінгенін паза тӧрелерініңде полча. Олардың сағынғаны паза арығ сағыс пар паза харындастар чіли тудынарға киректер.", - "type": "NarrativeText" - }, - { - "element_id": "74a93facd90bf0553bdf368698baa2a5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "som" - ] - }, - "text": "Khasi Ïa ki bynriew baroh la kha laitluid bad ki ïaryngkat ha ka burom bad ki hok. Ha ki la bsiap da ka bor pyrkhat bad ka jingïatiplem bad ha ka mynsiem jingsngew shipara ki dei ban ïatrei bynrap lang.", - "type": "NarrativeText" - }, - { - "element_id": "b6ab4d5f0569e217cd985de6b9f5ca73", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Khmer, Central មនុស្សទាំងអស់ កើតមកមានសេរីភាព និងសមភាព ក្នុងផ្នែកសេចក្ដីថ្លៃថ្នូរនិងសិទ្ធិ។ មនុស្ស មានវិចារណញ្ញាណនិងសតិសម្បជញ្ញៈជាប់ពីកំណើត ហើយគប្បីប្រព្រឹត្ដចំពោះគ្នាទៅវិញទៅមកក្នុងស្មារតីភាតរភាពជាបងប្អូន។", - "type": "Title" - }, - { - "element_id": "841467ed91005c2b65ccce68e9bac719", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Khün ᨾᨶᩩᩔ᩼ᨴ᩠ᨦᩢᩉᩖᩣ᩠ᨿᨠᩮ᩠ᨯᩨᨾᩣᨾᩦᨻ᩠ᨦᩈᩁᩓᩢᨹ᩠ᨿ᩵ᨦᨻ᩠ᨿᨦᨠ᩠ᨶᩢ ᨶᩱᨠᩥᨲ᩠ᨲᩥᩈ᩠ᨠᩢ ᩓᩢᩈᩥᨴ᩠ᨵᩥ ᨲ᩵ᩣ᩠ᨦᨣᩳ᩶ᨣᩢᨾᩦᨾᨶᩮᩣᨵᨾ᩠ᨾ᩼ᩓᩢ ᨣ᩠ᩅᩁᨷᨭᩥᨷ᩠ᨲᩢᨲᩳ᩵ᨠ᩠ᨶᩢᨯᩢ᩠ᩅ᩠ᨿᨣ᩠ᩅᩣ᩠ᨾᨹ᩠ᨿ᩵ᨦᨻ᩠ᨿᨦᨠ᩠ᨶᩢ", - "type": "Title" - }, - { - "element_id": "7abc18c11be0eb0d9f9526fbe76af972", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus", - "mkd" - ] - }, - "text": "Kirghiz Бардык адамдар өз беделинде жана укуктарында эркин жана тең укуктуу болуп жаралат. Алардын аң‐сезими менен абийири бар жана бири‐бирине бир туугандык мамилекылууга тийиш.", - "type": "NarrativeText" - }, - { - "element_id": "2490211a751af08c831f437250d70884", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "swa" - ] - }, - "text": "Kissi, Northern wanda tu cio Mɛ pilɔɔ o wolɔɔ ni, le waa o ba ndɔɔ cio, o bɛɛlen kenando ni, o tɔngdo ni, bɛtu nɔn yiyando a kullo, o kon ni naan tu dua mim maalyan kalapilɔyɛyi ni.", - "type": "NarrativeText" - }, - { - "element_id": "acff30c65cc8bd28c010b97e9b255653", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Kituba Bantu nyonso, na mbutukulu kevwandaka na kimpwanza ya bawu, ngenda mpe baluve ya mutindu mosi. Mayela na mbanzulu je na bawu, ni yawu yina bafwana kusalasana na bumpangi.", - "type": "NarrativeText" - }, - { - "element_id": "89b560fcf7e82a3a650bee70ceeb01ac", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl", - "ind" - ] - }, - "text": "Kituba (2) Bantu nyonso ntangu bawu ke butukaka, bawu ke vwandaka na kimpwanza, ya kele mutindu mosi mpe na yina me tadila buzitu ya nzutu mpe baluve ya bawu. Bawu kele na mayindu mpe na bumuntu. Mpe nyonso yina bawu fwana kusala na sika ya bantu ya nkaka, bawu fwana kusala yawu na mpeve ya kimpangui.", - "type": "NarrativeText" - }, - { - "element_id": "3da488a598903b0fa6a89a4d9b704219", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Komi-Permyak Быдӧс отирыс чужӧны вольнӧйезӧн да ӧткоддезӧн достоинствоын да правоэзын. Нылӧ сетӧм мывкыд да совесть овны ӧтамӧдныскӧт кыдз воннэзлӧ.", - "type": "NarrativeText" - }, - { - "element_id": "b613757216cf998e48abdf457b38e8e5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Konjo Abandu omububuthiranwa bakabuthawa ibanawithe obuthoki nobuholho obulingirirene, mobahangikwa ibanawithe amenge, neryo ibakathoka erighabania abathya ekibuya nekisandire. Nokweryo buli muyima atholere eryanza munyikiwe ngababuthenwe.", - "type": "NarrativeText" - }, - { - "element_id": "8957157e481718ba32250db7a8d7c310", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Koongo Bantu nyonso, na mbutukulu kevwandaka na kimpwanza ya bawu, ngenda mpe baluve ya mutindu mosi. Mayela na mbanzulu je na bawu, ni yawu yina bafwana kusalasana na bumpangi.", - "type": "NarrativeText" - }, - { - "element_id": "b3972bc2704b772167f5cbc75e4d4660", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Koongo (Angola) Bizingi bioso bisiwu ti batu bambutukanga mu kidedi ki buzitu ayi kibumswa. Bizingi-bene, batu, badi diela ayi tsi-ntima, bafwene kuzingila mbatzi-na-mbatzi-yandi mu mtima bukhomba.", - "type": "NarrativeText" - }, - { - "element_id": "71cc3fa5f30f347d8e225e871139661f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kor" - ] - }, - "text": "Korean 모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어 동등하다. 인간은 천부적으로 이성과 양심을 부여받았으며 서로 형제애의 정신으로 행동하여야 한다.", - "type": "NarrativeText" - }, - { - "element_id": "ec837c06df9c110c22e734be4704e763", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "swa" - ] - }, - "text": "Kpelle, Guinea Nukan gele kaa pələ kaa tanɔn, yiliɓa nu kəle maawiyə pələ da tɔɔi gaa ɲei yɛnɛyii hu kɛpələ kaalɔ tanɔn; di kɛmɛni a nukan ŋaa ɓə gɛɛ hwəkɛli wɛlikɛmaa ə lɔ di luwai.", - "type": "NarrativeText" - }, - { - "element_id": "6322dea6cfe74f4e5e0272752dccffb4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "eng", - "tgl" - ] - }, - "text": "Krio ɛvribɔdi bɔn fri ɛn gɛt in yon rayt, nɔn wan nɔ pas in kɔmpin. Wi ɔl ebul fɔ tink ɛn fɛnɔt wetin rayt ɛn rɔŋ pantap dat wi fɔ sabi aw fɔ liv lɛk wan big famili.", - "type": "NarrativeText" - }, - { - "element_id": "e4653071cb4a8a4f59ca7f62a50afbb4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "som" - ] - }, - "text": "Kulango, Bouna Igooyoo pɛɛ hʋn taa. Bɔ pɛɛ jabaga bɔrɔ. Hɔ ya gʋʋn’n bɔɔ hɛ pɛɛ, hɔ hɛ gusɛgɛ’n.", - "type": "NarrativeText" - }, - { - "element_id": "df4b88e2493c88f7b478eaece77dfdb7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur", - "nld", - "eng" - ] - }, - "text": "Kurdish, Central Hemû mirov azad û di weqar û mafan de wekhev tên dinyayê. Ew xwedî hiş û şuûr in û divê li hember hev bi zihniyeteke bratiyê bilivin.", - "type": "NarrativeText" - }, - { - "element_id": "26a7611f793432bd8ce6f6cb35470ad5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "tur", - "eng", - "afr" - ] - }, - "text": "Kurdish, Northern Hemû mirov azad û di weqar û mafan de wekhev tên dinyayê. Ew xwedî hiş û şuûr in û divê li hember hev bi zihniyeteke bratiyê bilivin.", - "type": "NarrativeText" - }, - { - "element_id": "0eaf9123417f2794584c7cfd20e10aee", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa", - "cat" - ] - }, - "text": "Ladin Dötes les porsones nasc lëdies y cun la medema dignité y i medemi dërć. Ares à na rajun y na cosciënza y mëss s’incuntè öna cun l’atra te n spirit de fraternité.", - "type": "NarrativeText" - }, - { - "element_id": "0e27738675f5136ec763f36ff9bb0ae2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Ladino Todos los umanos nasen libres i iguales en dinyidad i derechos i, komo estan ekipados de razon i konsensia, deven komportarsen kon ermandad los unos kon los otros.", - "type": "NarrativeText" - }, - { - "element_id": "5590b8f08d34a13d98afa307c3a0db0a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "vie", - "pol", - "som" - ] - }, - "text": "Lamnso' Á dzə̀ə́ wir dzə̀m réŋréŋ fó ghvəm wùn à fó ghày, á yo’ dzə̀ə́ wir msòŋ ji kwàn. Wìr dzə̀m k̀m k fómo woo fó kwà’tì wùn à fó vifii, a wù kér fó a yiì e wùmò’ woo wír moo fə́r və.", - "type": "NarrativeText" - }, - { - "element_id": "ae451bf94c5e07470540741833822372", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Lao ມະນຸດເກີດມາມີສິດເສລີພາບ ແລະ ສະເໝີໜ້າກັນໃນທາງກຽດຕິສັກ ແລະ ທາງສິດດ້ວຍມະນຸດມີສະຕິສຳປັດຊັນຍະ(ຮູ້ດີຮູ້ຊົ່ວ)ແລະມີມະໂນທຳຈື່ງຕ້ອງປະພຶດຕົນຕໍ່ກັນໃນທາງພີ່ນ້ອງ.", - "type": "Title" - }, - { - "element_id": "0ed168b2d2709781e3bd28875a311e5c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Latin Omnes homines dignitate et iure liberi et pares nascuntur, rationis et conscientiae participes sunt, quibus inter se concordiae studio est agendum.", - "type": "NarrativeText" - }, - { - "element_id": "390fa005137d580229352d11d7af483d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra", - "cat", - "ron" - ] - }, - "text": "Latin (1) Omnes homines liberi aequique dignitate atque juribus nascuntur. Ratione conscientiaque praediti sunt et alii erga alios cum fraternitate se gerere debent.", - "type": "NarrativeText" - }, - { - "element_id": "6cddab55572e83cd679bab750a745b46", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "lav" - ] - }, - "text": "Latvian Visi cilvēki piedzimst brīvi un vienlīdzīgi savā pašcieņā un tiesībās. Viņi ir apveltīti ar saprātu un sirdsapziņu, un viņiem jāizturas citam pret citu brālības garā.", - "type": "NarrativeText" - }, - { - "element_id": "84c7cce831ebebafd545d3767089bc8f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "lav" - ] - }, - "text": "Latvian (2) Visi cilvēki piedzimst brīvi un vienlīdzīgi cieņā un tiesībās. Viņiem ir dots saprāts un sirdsapziņa, un viņiem citam pret citu jāizturas brālības garā.", - "type": "NarrativeText" - }, - { - "element_id": "c431b1dcba75dca04cdeaaa5388f19c0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Ligurian Tutte e personn-e nascian libere e pæge in dignitæ e driti. Son dotæ de raxon e coscensa e gh’an da agî l’unn-a verso l’atra inte ’n spirito de fradelansa.", - "type": "NarrativeText" - }, - { - "element_id": "693ef7caa32675b109893e37846d9f13", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Limba, West-Central Biya-mɛti fooma be kiyo ka kuyankaŋ iŋ kasɛmbɛ mɛnɛ in ka yiki. Bindɛ kiŋ ba niyɔ in masimɔkɔ, maka yiina wo ka hu wɛndi yande.", - "type": "NarrativeText" - }, - { - "element_id": "d2f3db4ece1ba0a2826440f4e392a66d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Lingala Bato nyonso na mbotama bazali nzomi pe bakokani na limemya pe makoki. Bazali na mayele pe basengeli kofanda na bondeko okati na bango.", - "type": "NarrativeText" - }, - { - "element_id": "6fcb989c6e738221bc467859b15c2d51", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ces", - "hun" - ] - }, - "text": "Lingala (tones) Bato nyɔ́nsɔ na mbótama bazalí nsɔ́mí mpé bakókání na limɛmya mpé makokí. Bazalí na mayɛ́lɛ mpé basengélí kovánda na bondeko o káti na bangó.", - "type": "NarrativeText" - }, - { - "element_id": "353adb6fb432616b715be3966a6d79bd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "lit" - ] - }, - "text": "Lithuanian Visi žmonės gimsta laisvi ir lygūs savo orumu ir teisėmis. Jiems suteiktas protas ir sąžinė ir jie turi elgtis vienas kito atžvilgiu kaip broliai.", - "type": "NarrativeText" - }, - { - "element_id": "3e4f829a968d5f615b4245e85dc21d08", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Lobi Teehuu sʋnɔ n ther ɛɛ nɩɩ bʋnɔ wa n do deeaʔ sɩ wʋ n makha samɩnɩ na nà hʋ tɩnɛpar rà. Thangba ti yɛr à pɛ yɛr jɩɩr nà fɩlwɛ sɩ a teena waan fʋkha omkhaa.", - "type": "NarrativeText" - }, - { - "element_id": "7c7f50be4d274486c143858905c69e06", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Lozi Batu kaufela ba pepilwe inge ba lukuluhile ni liswanelo ze swana. Ba ba ni swanelo ya ku nahana mi ba swanela ku ba ni likezo za buzwale ku mutu yo mung'wi.", - "type": "NarrativeText" - }, - { - "element_id": "6197bb27429c967b218d90ce1ccd2a0c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Luba-Kasai Bantu bonsu badi baledibwa badikadile ne badi ne makokeshi amwe. Badi ne lungenyi lwa bumuntu ne kondo ka moyo, badi ne bwa kwenzelangana malu mu buwetu.", - "type": "NarrativeText" - }, - { - "element_id": "bc4bb086412d3334ab1dee422ea2cb3c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Lunda Muntu wejima wasemuka walukbuka wesekana hamu ni akwawu mukumulemesha. Wenkewa kutong'ojoka nikuzatila hamu nimukwawu muntu muwunta'a.", - "type": "NarrativeText" - }, - { - "element_id": "872908665791636f2ec3f0477922984f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Luvale Vatu vosena vasemuka yapwa hohamwe nakweseka mukuyoya chavo. Vatwama nachiyoyelo chalusesa chajingolo chakuzanga kulivwashana muchiyoyelo chavo.", - "type": "NarrativeText" - }, - { - "element_id": "3f8cca735e9bb8ee68adff123b7ebdda", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "deu" - ] - }, - "text": "Luxembourgeois All Mënsch kënnt fräi a mat deer selwechter Dignitéit an dene selwechte Rechter op d'Welt. Jiddereen huet säi Verstand a säi Gewësse krut an soll an engem Geescht vu Bridderlechkeet denen anere géintiwwer handelen.", - "type": "NarrativeText" - }, - { - "element_id": "1a2cc3d892dc79a4b68cc59db7a69ea1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mkd" - ] - }, - "text": "Macedonian Сите човечки суштества се раѓаат слободни и еднакви по достоинство и права. Тие се обдарени со разум и совест и треба да се однесуваат еден кон друг во духот на општо човечката припадност.", - "type": "NarrativeText" - }, - { - "element_id": "ce6f9e17e88d78727c8e1483fb614015", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Madura Sadajana oreng lahir mardika e sarenge drajat klaban hak-hak se dha-padha. Sadajana eparenge akal sareng nurani ban kodu areng-sareng akanca kadi taretan.", - "type": "NarrativeText" - }, - { - "element_id": "2e4fdb7fcd2748cce07840226331c829", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hin" - ] - }, - "text": "Magahi सब लोग आजादे जन्म लेब हई तथा सब के बराबरे सम्मान और अधिकार हइ। हुनखो के पास समझ-बूझ और अंत:करण के आवाज होब हई। और हुनका दोसरो के साथ भाईचारा के व्यवहार करे पड़ हई।", - "type": "UncategorizedText" - }, - { - "element_id": "d691df62a8af33ae0b9c152a092e32a9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hin", - "nep" - ] - }, - "text": "Maithili सभ मानव जन्मतः स्वतन्त्र अछि तथा गरिमा आʼ अधिकारमे समान अछि। सभकेँ अपन–अपन बुद्धि आʼ विवेक छैक आओर सभकेँ एक दोसराक प्रति सौहार्दपूर्ण व्यवहार करबाक चाही।", - "type": "UncategorizedText" - }, - { - "element_id": "d73cc566475e568433ff76c1fb6af485", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl", - "ind" - ] - }, - "text": "Makhuwa Atthu othene aniyaria oolikana ni owilamula moota ontthunaya okhala, variyari v’edignidade ni edireito. Akhalanne esaria ni otthokelela, ahaana akhalasaka othene saya vamurettele.", - "type": "NarrativeText" - }, - { - "element_id": "166af43c7950017574b550ca090a6ff8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "hrv" - ] - }, - "text": "Makonde Vanu vohevohe vaidile n’chilambo valendene. Vanijaliwa ulimala vene. Pavele vanu pave na ulongo.", - "type": "NarrativeText" - }, - { - "element_id": "b672ca63908de1ff358d10ef96fd3d81", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl", - "slk" - ] - }, - "text": "Malagasy, Plateau Teraka afaka sy mitovy zo sy fahamendrehana ny olombelona rehetra. Samy manan-tsaina sy fieritreretana ka tokony hifampitondra am- pirahalahiana.", - "type": "NarrativeText" - }, - { - "element_id": "e74053233c7584ace3ddb4357ac894b7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ara", - "fas" - ] - }, - "text": "Malay (Arabic) سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق٢. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.", - "type": "NarrativeText" - }, - { - "element_id": "cec58af843b3ffbe84e80cc0ac35d856", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Malay (Latin) Semua manusia dilahirkan bebas dan samarata dari segi kemuliaan dan hak-hak. Mereka mempunyai pemikiran dan perasaan hati dan hendaklah bertindak di antara satu sama lain dengan semangat persaudaraan.", - "type": "NarrativeText" - }, - { - "element_id": "563cefb3266bb81ad240fb3d631fb5b0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mal" - ] - }, - "text": "Malayalam മനുഷ്യരെല്ലാവരും തുല്യാവകാശങ്ങളോടും അന്തസ്സോടും സ്വാതന്ത്ര്യത്തോടുംകൂടി ജനിച്ചിട്ടുള്ളവരാണ്‌. അന്യോന്യം ഭ്രാതൃഭാവത്തോടെ പെരുമാറുവാനാണ്‌ മനുഷ്യന്നു വിവേകബുദ്ധിയും മനസ്സാക്ഷിയും സിദ്ധമായിരിക്കുന്നത്‌.", - "type": "NarrativeText" - }, - { - "element_id": "a1c5471ea369ac3ba44f2829262f62aa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mal" - ] - }, - "text": "Malayalam മനുഷ്യരെല്ലാവരും തുല്യാവകാശങ്ങളോടും അന്തസ്സോടും സ്വാതന്ത്ര്യത്തോടുംകൂടി ജനിച്ചിട്ടുള്ളവരാണ്‌. അന്യോന്യം ഭ്രാതൃഭാവത്തോടെ പെരുമാറുവാനാണ്‌ മനുഷ്യന്നു വിവേകബുദ്ധിയും മനസ്സാക്ഷിയും സിദ്ധമായിരിക്കുന്നത്‌.", - "type": "NarrativeText" - }, - { - "element_id": "abe9340337f1806d7c7bb1e55e23819f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ara" - ] - }, - "text": "Maldivian ހުރިހާ އިންސާނުންވެސް ދުނިޔެއަށް އުފަންވަނީ، މިނިވަންކަމުގައި، ހަމަހަމަ ޙައްޤުތަކަކާއެކު، ހަމަހަމަ ދަރަޖައެއްގައި ކަމޭހިތެވިގެންވާ ބައެއްގެ ގޮތުގައެވެ. ހެޔޮ ވިސްނުމާއި، ހެޔޮބުއްދީގެ ބާރު އެމީހުންނަށް ލިބިގެންވެއެވެ. އަދި އެކަކު އަނެކަކާމެދު އެމީހުން މުޢާމަލާތް ކުރަންވާނީ، އުޚުއްވަތްތެރިކަމުގެ ރޫޙެއްގައެވެ.", - "type": "NarrativeText" - }, - { - "element_id": "c3f212c4f2a219b94139b577bd336587", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "swa" - ] - }, - "text": "Maltese Il-bnedmin kollha jitwieldu ħielsa u ugwali fid-dinjità u d-drittijiet. Huma mogħnija bir-raġuni u bil-kuxjenza u għandhom iġibu ruħhom ma’ xulxin bi spirtu ta’ aħwa.", - "type": "NarrativeText" - }, - { - "element_id": "d19e3ea923ac9598f7ebe493963dcb57", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Mam, Northern Kyaqiilqe winaq nchi itz'aj tuj kopib'il, juunx kychuwiinqal b'ix kyokleen, kyja'tzan tuj tb'aanal xiinv'il tu'n kyanq'iin tuj b'ank'u'j kyxool.", - "type": "NarrativeText" - }, - { - "element_id": "53014d120e3ef288a2152a64e8cc5fae", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tur", - "som" - ] - }, - "text": "Maninkakan, Eastern Adamadennu bɛɛ sɔdɔnɲa kakan, hɔrɔya dɔ, fabadenɲa dɔ ani sariya ta fan dɔ. Hankili ni sɔnɔmɛ ye alu bɛɛ ma, a kakan wo dɔ alu ye bakelenɲa sila lataaman alu ɲɔɔn tɛ.", - "type": "NarrativeText" - }, - { - "element_id": "61226d5c10b4c0396f2f95f2ca652df3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Manx Ta dy chooilley ghooinney ruggit seyr as corrym rish dy chooilley ghooinney elley ayns ooashley as ayns cairys. Ta resoon as cooinsheanse stowit orroo as lhisagh ad dellal rish y cheilley lesh spyrryd braaragh.", - "type": "NarrativeText" - }, - { - "element_id": "a937f2e976892410723177039216ec66", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Maori Ko te katoa o nga tangata i te whanaungatanga mai e watea ana i nga here katoa; e tauriterite ana hoki nga mana me nga tika. E whakawhiwhia ana hoki ki a ratou te ngakau whai whakaaro me te hinengaro mohio ki te tika me te he, a e tika ana kia meinga te mahi a tetahi ki tetahi me ma roto atu i te wairua o te noho tahi, ano he teina he tuakana i ringa i te whakaaro kotahi.", - "type": "NarrativeText" - }, - { - "element_id": "dae3f973f6bbdd3401ce4aa3e297b361", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "dan", - "afr" - ] - }, - "text": "Mapudungun Kom pu mogence kisuzuam mvlekey, kom cegeygvn, logkogeygvn ka piwkegeygvn, nieygvn kimvn fey mew mvley tañi yamniewael ka epuñpvle kejuwael egvn.", - "type": "UncategorizedText" - }, - { - "element_id": "ecca335c6a309f063e4df0ad38eecd27", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mar" - ] - }, - "text": "Marathi सर्व मानवी व्यक्ति जन्मतःच स्वतंत्र आहेत व त्यांना समान प्रतिष्ठा व समान अधिकार आहेत. त्यांना विचारशक्ति व सदसविद्वेकबुद्धि लाभलेली आहे. व त्यांनी एकमेकांशी बंधुत्याच्या भावनेने आचरण करावे.", - "type": "NarrativeText" - }, - { - "element_id": "01fca41684c1b7b968a512dfeec0139e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv" - ] - }, - "text": "Marshallese Armij otemjej rej rujlok ilo anemkwoj im jonon utiej eo im maron ko air wot juon. Emwij lelok non ir maron in bukot non ir make im bareinwot boklikot kin men ko rej tomaki im bwe jerbal non dron ilo juon jitobon jimpenjatin.", - "type": "NarrativeText" - }, - { - "element_id": "3a69fb7fe5d36459edf30ffa8f0fb0bc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "cat" - ] - }, - "text": "Matsés Chidon tishaido yec matses abitedimbo bëdamboec isnanac bëdambo ictsiash. Chieshnanac icsambo ictsiash. Abitedimbo bëdamboec tabadac bëdambo ictsiash. Shubu abentsëcquidën tabadac birnboec abitedi tabadac bëdambo ictsiash - quequin chuipanëdash nidaid abitedinoësh cho-choquidon.", - "type": "NarrativeText" - }, - { - "element_id": "9c3467ac29002d9da69f15b063e13924", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hun", - "ind", - "tur", - "som" - ] - }, - "text": "Maya, Yucatán Tuláakal wíinik ku síijil jáalkʼab yetel keet u tsiikul yetel Najmal Sijnalil, beytun xan naʼataʼan sijnalil yetel noʼojaʼanil u tuukuloʼ, kʼaʼabet u bisikuba bey láaktzilil yetel tuláakal u baatzileʼ.", - "type": "NarrativeText" - }, - { - "element_id": "7947c1a7d2c92cd1fea5311d4d9241ba", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "slv", - "sqi" - ] - }, - "text": "Mazahua Central Texe yo nte̱'e̱ chjetrjoji, angezeji ximi xo'oji ñeje k'inchiji, nesta ra ngara na jo'o k'o dyaja e nte̱'e̱.", - "type": "UncategorizedText" - }, - { - "element_id": "ded8e8298bf9edcaae477d35c01be283", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "slv" - ] - }, - "text": "Mazatec, Ixcatlán Nga ndindie xuta ngatsen de’e ko ngondsejen ngatjin-kjua nga xchandinkon nt’a ngondsejen ngatjin kokjin-tokon,kotjinkjua nga takie engajan skuendinkon xkjin.", - "type": "NarrativeText" - }, - { - "element_id": "150dfe46097e25b8aa601565b3487049", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Mbundu O athu woso avwala abhuluka ni kusokela mu kijingu ni mu itekelu. Ene ala ni ulungilu ni kilunji ni atokala kulaya kumoxi nya akwa mu mixima ya undandu.", - "type": "NarrativeText" - }, - { - "element_id": "407b0080d05f944ba83f5c3e722bde13", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Mbundu (009) Mutu uoso uoso a mu vuala ni ufolo ni kutena kumoxi mu kijingu ni mu ubinganu. Mu kilembu kia kubanga ni mu ubanzelu, Atena uê kubanga ioso kua akua mu muxima ua tululuka mba upange.", - "type": "NarrativeText" - }, - { - "element_id": "d76da3518499aeb0e43b4c133556d135", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Mende Numuvuisia Kpɛlɛɛ ta ti le tɛ yɛ nduwɔ ya hu, tao ti nuvuu yei kɛɛ ti lɔnyi maa hɛwungɔ. Kiiya kɛɛ hindaluahu gɔɔla a yɛlɔ ti hun. Fale mahoungɔ ti ti nyɔnyɔhu hoi kia ndeegaa.", - "type": "NarrativeText" - }, - { - "element_id": "ac3c7d9dea662f8ba1dfb383045ce903", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "sqi", - "hrv" - ] - }, - "text": "Micmac Msit mimajulnu’k weskwijinu’ltijik alsumsultijik aqq newte’ tett wkpimte’tmut aqq koqwajo’taqnn wejkul’aqmititl.", - "type": "NarrativeText" - }, - { - "element_id": "3bc1008b1e95383cab780d483a216d43", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl" - ] - }, - "text": "Minangkabau Sadonyo manusia dilahiakan mardeka dan punyo martabat sarato hak-hak nan samo. Mareka dikaruniai aka jo hati nurani, supayo satu samo lain bagaul sarupo urang badunsanak.", - "type": "NarrativeText" - }, - { - "element_id": "208949d3fb140dd9413f78a99feda832", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Mískito Upla sut ba kulkanka lakara, airaitka nanira bara pri, sin, aikuki, baku takisa. Bamna sins laka bri baku, lukanka bain pri baku aimuihni lakara, pana pana tabaikan kaiasa.", - "type": "NarrativeText" - }, - { - "element_id": "db840a4da82f82310ee839cd22112f22", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin" - ] - }, - "text": "Mixe, Totontepec Tum akijpxa xa ve’e jayu kye’ex, ve’em ax jö’n tyukidaakjüva tijaty mëkin; ve’empa axjö’n jä jyööjtykin di yaknaxy, jats oy myujatyöö’tëjk di mëët nayjavajüt.", - "type": "NarrativeText" - }, - { - "element_id": "bbe9fa33187b976f4032c34c6ca2fabf", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "swa" - ] - }, - "text": "Mixtec, Metlatónoc Taka ma ñayi nguiakoi ñayivi ñatu na ja'a tnu'u ja kusa'a ndeva'ña-i, su'uva kajito va'aña-i, yuka ku ja jiniñu'u ja kukototna-i.", - "type": "NarrativeText" - }, - { - "element_id": "03b6cefe8d16c5c896f974b268a52302", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl", - "swa" - ] - }, - "text": "Mizo Mi zawng zawng hi zalêna piang kan ni a, zahawmna leh dikna chanvoah intluk tlâng vek kan ni. Chhia leh tha hriatna fîm neia siam kan nih avangin kan mihring puite chungah inunauna thinlung kan pu tlat tur a ni.", - "type": "NarrativeText" - }, - { - "element_id": "ec6cdd4d644ddfaafbb05d9216ebbd7c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "som", - "tgl" - ] - }, - "text": "Moba Nifoi kul maal yendu buam po i, k b yudand yen b yiko-nba biɛ ja. B mɔg maalm g ban yal g ŋan, g biɛ baa bu yen lieb naataann n ninŋ i.", - "type": "NarrativeText" - }, - { - "element_id": "0d21e19f00c8cb7264e83c01c0f02161", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Mon မၞိဟ်ဂမၠိုၚ် အိုဿီုတအ်ဝွံ စနူသၠးတိတ် နူဂဝ်ဂၞဴ ဒှ်မၞိဟ်သၠးပွးအိုတ်တုဲ အခေါၚ်အရာ ကေုာံ သိက္ခာမၞိဟ်တအ် တုပ် သၟဟ်ရ။ မၞိဟ်တအ်ဂှ် နွံကဵုဓရ်စၚ်ခြၚ်ကေုာံ သမ္တီညာဏ် ဓဝ်ပါ်ပဲါ ခိုဟ်ပရေံနွံတုဲ ညးမွဲ ကေုာံ ညးမွဲ ထေက်ကဵု သ္ဒးဒ္ဂေတ်ဗက် ဆက်ဆောံညးသ္ကအ် နစိုတ်ဓာတ်ကောံဒေံအရေ။", - "type": "UncategorizedText" - }, - { - "element_id": "a36553665277971db5d4c68908f99088", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Mongolian, Halh (Cyrillic) Хүн бүр төрж мэндлэхэд эрх чөлөөтэй, адилхан нэр төртэй, ижил эрхтэй байдаг. Оюун ухаан, нандин чанар заяасан хүн гэгч өөр хоорондоо ахан дүүгийн үзэл санаагаар харьцах учиртай.", - "type": "NarrativeText" - }, - { - "element_id": "d68747fffbd22857ff75b3bfe7dc00c4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Mongolian, Halh (Mongolian)", - "type": "Title" - }, - { - "element_id": "ffd087e56c47b9405e77d2f08dca7d1e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "ᠬᠦᠮᠦᠨ ᠪᠦᠷ ᠲᠥᠷᠥᠵᠦ ᠮᠡᠨᠳᠡᠯᠡᠬᠦ ᠡᠷᠬᠡ ᠴᠢᠯᠥᠭᠡ ᠲᠡᠢ᠂ ᠠᠳᠠᠯᠢᠬᠠᠨ ᠨᠡᠷ᠎ᠡ ᠲᠥᠷᠥ ᠲᠡᠢ᠂ ᠢᠵᠢᠯ ᠡᠷᠬᠡ ᠲᠡᠢ ᠪᠠᠢᠠᠭ᠃ ᠣᠶᠤᠨ ᠤᠬᠠᠭᠠᠨ᠂ ᠨᠠᠨᠳᠢᠨ ᠴᠢᠨᠠᠷ ᠵᠠᠶᠠᠭᠠᠰᠠᠨ ᠬᠦᠮᠦᠨ ᠬᠡᠭᠴᠢ ᠥᠭᠡᠷ᠎ᠡ ᠬᠣᠭᠣᠷᠣᠨᠳᠣ᠎ᠨ ᠠᠬᠠᠨ ᠳᠡᠭᠦᠦ ᠢᠨ ᠦᠵᠢᠯ ᠰᠠᠨᠠᠭᠠ ᠥᠠᠷ ᠬᠠᠷᠢᠴᠠᠬᠥ ᠤᠴᠢᠷ ᠲᠠᠢ᠃", - "type": "UncategorizedText" - }, - { - "element_id": "3d0a59b543e077c2f0c391add9b38a89", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Montenegrin Sva ljudska bića rađaju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i savješću i jedni prema drugima treba da postupaju u duhu bratstva.", - "type": "NarrativeText" - }, - { - "element_id": "86eff2400c116e5d00b9f1b3e17e0d7f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ind", - "cym" - ] - }, - "text": "Mòoré Ninsaalbã fãa sã n doge, ned fãa so a menga, ned pa rogd n yaa yamb ye, nebã fãa zema taab b yel-segdɩ la b burkĩndlem wɛɛngẽ. Nebã fãa tara yam la tagsgo, ned fãa togame n vɩɩnd ne a to saam-biir pʊgẽ.", - "type": "NarrativeText" - }, - { - "element_id": "91eb2842523b8e930ee6199a0098fa14", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Moro Leđa pređ lalǝŋǝnia lëbǝrëinialo na lǝɽǝwaṯo eŋen ŋǝđamia na eŋen pređ iŋi ŋǝrcađaṯo ṯa leđa alǝfiđi. Lënŋulu pređ lananëinu đǝnaca đame ɽetǝɽeto na ara gǝŋǝra ŋenŋanṯa alǝɽǝwađaṯe alamǝđaiđe bǝɽan usilaga gǝŋǝlǝŋǝnia na gǝŋorba.", - "type": "NarrativeText" - }, - { - "element_id": "0e458a0b7d5fb50416d274c11e747017", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa", - "ita" - ] - }, - "text": "Mozarabic Totos les esseres humanos nascent libberos et eguales in dignitate e dretos e, dotatos commo stant de racione e conscientia, devent comportarse in germanitate les unos con les altros.", - "type": "NarrativeText" - }, - { - "element_id": "25ab4cdce4c3199b55a4bd49864e981b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "est" - ] - }, - "text": "Naga, Ao Meimchir ajak temeten aser tashi kasa nüji nung asor. Parnok dak bilemtettsü shisatsü aser tangatetba kasa agüja aliba jagi külem adianu rongnung tanela ka nung lungjema alitsüla.", - "type": "NarrativeText" - }, - { - "element_id": "ae7016d3a16e6fef244158363a24ac9e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "ind" - ] - }, - "text": "Nahuatl, Central Nochi tlakamej uan siuamej kipiaj manoj kuali tlakatisej, nochi san se totlatechpouiltilis uan titlatepanitalojkej, yeka moneki kuali ma timouikakaj, ma timoiknelikaj, ma timotlasojtlakaj uan ma timotlepanitakaj.", - "type": "NarrativeText" - }, - { - "element_id": "9376ea8b7100165bb8bd466c00f5bdcc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Nanai Хэмту найсал гипалин, мэнэ гэбудиэри, правосалдиари эмуту балдичи. Нёанчи муруӈку, дэрэлку, диа диавари а-нэу-мэт бодомари тагилайчи.", - "type": "NarrativeText" - }, - { - "element_id": "201308d749f47555d03c5087f304457b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ces" - ] - }, - "text": "Navajo Bilaʼashdaʼii tʼáá ałtsoh yiníkʼehgo bidizhchįh dóó aheełtʼeego ílį́į́go bee baahóchįʼ. Eíí háníʼ dóó hánítshakees hwiihdaasyaʼ eíí binahjį́ʼ ahidiníłnáhgo álíleekʼehgo kʼé bee ahił niidlį́.", - "type": "NarrativeText" - }, - { - "element_id": "7c1696d6b3e99d3ac8481060f74fe8d7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Ndebele Abantu bonke bazalwa bekhululekile njalo belingana kumalungelo abo. Balesipho sikanembeza, ngakho bamele baphathane ngomoya otshengisa ubuhlobo lobunye.", - "type": "NarrativeText" - }, - { - "element_id": "d63b3107bf325e2c695213bc9dd8742e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Ndonga Aantu ayehe oya valwa ye na emanguluko noye na ondilo yi thike pamwe osho wo uuthemba. Oye na omaipulo goondunge neiuvo onkene naa kalathane mombepo yuumwainathana.", - "type": "NarrativeText" - }, - { - "element_id": "a0cad811bb49185b6fdb66fb2060c59a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus", - "bul" - ] - }, - "text": "Nenets Ет хибяри ненэць соямарианта хуркари правада тнява, ӈобой ненэця ниду нись токалба, ӈыбтамба илевату тара.", - "type": "NarrativeText" - }, - { - "element_id": "80851f8727cbd5baeb6611ada10ff1f9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nep" - ] - }, - "text": "Nepali सबै व्यक्ति हरू जन्मजात स्वतन्त्र हुन ती सबैको समान अधिकार र महत्व छ। निजहरूमा विचार शक्ति र सद्धिचार भएकोले निजहरूले आपसमा भातृत्वको भावना बाट व्यवहार गर्नु पर्छ।", - "type": "UncategorizedText" - }, - { - "element_id": "23ce504c8239c6964f02399ff1fcb1bf", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Nganasan Бәнде” ӈанасанә” ӈәтукәнды” нендя”туо” ӈонә хонсы хелиде” ӈиле мәнәй (правай). Сытыӈ хонды” ӈиле ӈонда ӈонә сяру, дүзытәндыӈ ихүтүӈ нягәә” сүөарусә”.", - "type": "NarrativeText" - }, - { - "element_id": "14d1c1d94f755feee4c5765fa51fb448", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Niue Ko e tau tagata momoui oti kua fanau ai ke he fakatokanoaaga mo e fakatatai oti e tau tutuaga mo e tau tonuhia. Kua moua ai foki e lautolu e kakano mo e manamanatuaga ti kua lata ni ke fakafetui e taha ke he taha ke he agaga fakamatakainaga.", - "type": "NarrativeText" - }, - { - "element_id": "9164d07351a9366edfae5357e2ab807c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Nomatsiguenga Antagaisati matsiguenga ibogaiguë matsiguengasonorl. Aisati icantaigaca. Teni iromerataiguengani. Antagaisati iquengaigui aisati igóiguiro ora caninaro aisati igóiguiro ora te onganinate. Iroro caninataque omagaro matsiguenga iraniacaninataigueri ira basiniati matsiguenga aisati ingantaiguerí ora caninaro.", - "type": "NarrativeText" - }, - { - "element_id": "a2d52f93737464a25abcd5d12c771b98", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nor" - ] - }, - "text": "Norwegian, Bokmål Alle mennesker er født frie og med samme menneskeverd og menneskerettigheter. De er utstyrt med fornuft og samvittighet og bør handle mot hverandre i brorskapets ånd.", - "type": "NarrativeText" - }, - { - "element_id": "0de9dab37169c4ded9b7f75bedf80c7f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nor" - ] - }, - "text": "Norwegian, Nynorsk Alle menneske er fødde til fridom og med same menneskeverd og menneskerettar. Dei har fått fornuft og samvit og skal leve med kvarandre som brør.", - "type": "NarrativeText" - }, - { - "element_id": "00ebc1efcc4358c32327bc6327f0a581", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Nuosu", - "type": "Title" - }, - { - "element_id": "dcfcf466590e9daa75e86df759c90a23", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "zho" - ] - }, - "text": "ꊿꂷꃅꄿꐨꐥ,ꌅꅍꀂꏽꐯꒈꃅꐥꌐ。ꊿꊇꉪꍆꌋꆀꁨꉌꑌꐥ,ꄷꀋꁨꂛꊨꅫꃀꃅꐥꄡꑟ。", - "type": "Title" - }, - { - "element_id": "68861af146d56db218a932271da013ea", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Nyamwezi Banhu bose bubyalagwa biyagalulile, n’ikujo haki zilenganelile.", - "type": "NarrativeText" - }, - { - "element_id": "2b70b2e37cc28ecb50a65c1977764c27", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Nyanja (Chechewa) Anthu onse amabadwa aufulu ndiponso ofanana mu ulemu ndi ufulu wao. Iwowa ndi wodalitsidwa ndi mphamvu zoganiza ndi chikumbumtima ndipo achitirane wina ndi mnzake mwaubale.", - "type": "NarrativeText" - }, - { - "element_id": "fd152d98beaa7e2f825aac9b4d031412", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Nyanja (Chinyanja) Anthu onse amabadwa mwa ufulu ndiponso olinganga m' makhalidwe ao. Iwo amakhala ndi nzeru za cibadwidwe kotero ayenera kucitirana zabwino wina ndi mnzace.", - "type": "NarrativeText" - }, - { - "element_id": "a47d589be50e40faa0306403da28d30d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Nyankore Abantu nibazaarwa baine obugabe nobushoborozi ebiri kwingana nibahangwa baine obwengye kandi barikubasa kwahura ekirungi nekibi, nahabwekyo abantu bashemereire kutuura kumwe nkabanya Uganda.", - "type": "UncategorizedText" - }, - { - "element_id": "ba8f5d6e4dd82ab64f5d456db0bb5fe6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Nyemba Vanu voxe vakasemuka mu cizango co mumo lika mu vulemu co kulimanena. Vakevo vakala na mangana co na mbunge co vana pande kulinga vamo na vakwavo na mbunge ya vuna yina.", - "type": "NarrativeText" - }, - { - "element_id": "8bb5a449ca76c9652411df83a16d36a5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur", - "swa", - "afr", - "som" - ] - }, - "text": "Nzema Menli muala di bɛ ti anwo na eza noko bɛsɛ wɔ dibilɛ nee adenlenyianlɛ nu. Bɛlɛ ndwenlenwo nee adwenle, yemɔti ɔwɔ kɛ bɛkile adiemayɛlɛ bɛmaa bɛ nwo ngoko.", - "type": "NarrativeText" - }, - { - "element_id": "945f5e12a8c939707776f2152604ea76", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra", - "ita" - ] - }, - "text": "Occitan Tóuti lis uman naisson libre. Soun egau pèrla digneta e li dre. An tóuti uno resoun e uno counsciènci. Se dèvon tenifreirenau lis un 'mé lis autre.", - "type": "NarrativeText" - }, - { - "element_id": "de85ed5a407a19c2c1c89211693d8861", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Occitan (Auvergnat) Ta la proussouna neisson lieura moé parira pà dïnessà mai dret. Son charjada de razou moé de cousiensà mai lhu fau arjî entremeî lha bei n'eime de freiressà.", - "type": "NarrativeText" - }, - { - "element_id": "6260219bc4a42037e7d6f0418b7284c5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Occitan (Francoprovençal, Fribourg) Totè lè dzin vinyon ou mondo libro è parê in dinyitâ è in drê. Chon dotâ dè réjon è dè konhyinthe è dêvon chè konportâ lè j’on-lè j’ôtro din on èchpri dè fratèrnitâ.", - "type": "NarrativeText" - }, - { - "element_id": "b47382b7a0e0afd209aa7e1993565391", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "cat" - ] - }, - "text": "Occitan (Francoprovençal, Savoie) Tu luz òmò vinyon u mondo, librò, tu tòton pè leû dinyitò è leû drèye. Y’on tu d’émò è dè konhyinhi è i dèvon fè- mouhò dè fratèrnitò aouèy luz òtri.", - "type": "NarrativeText" - }, - { - "element_id": "da6df9434bcea33fdb84c07309f23605", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra", - "ita" - ] - }, - "text": "Occitan (Francoprovençal, Valais) Tui lè jêtre humain néchon libro è pary in degnetâ é in drouê. Chon reijonâbló è dè counchieince è deivouon âzic lè j’oun vi j’avi di j’âtró in pèr oun espri dè fratèrnitâ", - "type": "NarrativeText" - }, - { - "element_id": "4be88083cf737cac6ec1b39afb2513c5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Occitan (Francoprovençal, Vaud) Tî lè z’ître humain vîgnant âo mondo libro et parâi dein la dignitâ et lè drâi. L’ant reçu réson et concheince et dâivant vivre lè z’on avoué lè z’autro quemet se sant frâre et chèra.", - "type": "NarrativeText" - }, - { - "element_id": "ca97829bba2e332be352861c0d0e0c70", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "fra", - "spa" - ] - }, - "text": "Occitan (Languedocien) Totes los èssers umans naisson liures e egals en dignitat e en dreches. Son dotats de rason e de consciéncia e se devon comportar los unes amb los autres dins un esperit de fraternitat.", - "type": "NarrativeText" - }, - { - "element_id": "2c541386adb644071a67fa19c80d221f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Ojibwa, Northwestern ᑭᑲᓇᐌᓀᓐ ᑲᐱᒪᑎᓯᐗᑦ ᓂᑕᐎᑭᐗᒃ ᑎᐯᓂᒥᑎᓱᐎᓂᒃ ᒥᓇ ᑕᐱᑕ ᑭᒋᐃᓀᑕᑯᓯᐎᓐ ᑲᔦ ᑌᐸᑫᑕᑯᓯᐎᓐ. ᐅᑕᔦᓇᐗ ᒥᑲᐎᐎᓐ ᑲᔦ ᓂᑄᑲᐎᓐ ᒥᓇᐗ ᑕᔥ ᒋᐃᔑᑲᓇᐗᐸᑎᐗᐸᓐ ᐊᒐᑯ ᒥᓄᐎᒋᐎᑎᐎᓂᒃ.", - "type": "NarrativeText" - }, - { - "element_id": "1770d7b5d51d295d22599366d8285ccc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "tgl", - "hun", - "afr" - ] - }, - "text": "Okiek Piik togol kosigotiik en katiagetapkei koguyet ak imandanyuwan koyuyosin togol kogigigochi ngomnotet ak koperuret en iyon konyolu koyochigei oteptop tupchondit.", - "type": "UncategorizedText" - }, - { - "element_id": "838854e8c37bc2424bd4b8b4324da0a4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Orok Чипа̄ли гурунне̄ балӡичи гэвумэ, омотто мэ̄нэ мөрөнӡи, мэ̄нэ доронӡи. Но̄чи идэлу, иркалу, мэ̄нэ мэ̄нӡи на̄дактаӈачи бјӣчи.", - "type": "NarrativeText" - }, - { - "element_id": "58f4dadcdcd7410be6d204f2287e31c4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Oromo, Borana-Arsi-Guji Namooti hundinuu birmaduu ta'anii mirgaa fi ulfinaanis wal-qixxee ta'anii dhalatan. Sammuu fi qalbii ittiin yaadan waan uumamaan kennameef, hafuura obbolummaatiin walii-wajjin jiraachuu qabu.", - "type": "NarrativeText" - }, - { - "element_id": "94e7fb62cfa3b7bce4161724caed0203", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Oroqen Beyel bambur zhiyu bishi, zhunyan-du bineken chuanli-du bambur pingdeng bishi. Nugartin lishing bineken liangshin bishi, akin nekun guanshi-ngi chingshen-du-in duidai-meet-ki-tin.", - "type": "NarrativeText" - }, - { - "element_id": "61b9c386f4d7f982e217e8a0973deae9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Osetin Адӕймӕгтӕ се 'ппӕт дӕр райгуырынц сӕрибарӕй ӕмӕ ӕмхуызонӕй сӕ барты. Уыдон ӕххӕст сты зонд ӕмӕ намысӕй, ӕмӕ кӕрӕдзийӕн хъуамӕ уой ӕфсымӕрты хуызӕн.", - "type": "NarrativeText" - }, - { - "element_id": "f829c47775b5845587447d35b6b41e40", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "swa", - "cym" - ] - }, - "text": "Otomi, Mezquital Gotho nu kja'ni i mu̱i ra zoo i gotho ro kuchti, i tu'ni nu ro ña padä bini i da budi, da mu̱i ra zoo koyu gotho yu kja'ni i yo kuadi.", - "type": "NarrativeText" - }, - { - "element_id": "a397f19eefe134a148362c0ba710828f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv", - "est", - "ind", - "tgl" - ] - }, - "text": "Otuho lsiuni aati dang iko ahodc hade ihaniere erre boo ve isi orrijori dang to nelotulo. Owoni isi iko negigilita bwo ve iko ataja. Ongida isi ihanie awatek hosi ihwo elarak.", - "type": "NarrativeText" - }, - { - "element_id": "dd2ab495e062b9a11fe24355a3c1319e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Páez Ya'nwe'wewa'te' maa nasapa ha'dacehk hi'pku up'hi', wëtte u'huwa'hi'pta', eena' eena' f'i'zewa' hi'pta', üus hi'pta' d'ik'the hi'pta' naapa'kate. Sa' h'ukaysa üus hi'pcehktha'w sa' pyakhna'we f'i'ze hi'ptha'w.", - "type": "NarrativeText" - }, - { - "element_id": "b4f294da67df35573403a536b2853dd4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron", - "cat", - "ita", - "est" - ] - }, - "text": "Palauan A rogui 'l chad el mechell a ngarngii a ilmokl er tir ra diosisiu el llemalt. Ngarngii er tir a uldesuir mete mo meruul el mo rar bebil lokiu a ungil 'l omeruul ra klauchad.", - "type": "NarrativeText" - }, - { - "element_id": "ffd211be4b0b2eabd1a1792cc2b8afa1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Pampangan Ding sablang tau mibait lang malaya at pante-pante king karangalan at karapatan. Ila mipagkaluban lang katuliran at konsensiya ay dapat misaupan king diwang pamikapatiran.", - "type": "NarrativeText" - }, - { - "element_id": "068d755c0e132506c2d31786a7ed4b32", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "pan" - ] - }, - "text": "Panjabi, Eastern ਸਾਰਾ ਮਨੁੱਖੀ ਪਰਿਵਾਰ ਆਪਣੀ ਮਹਿਮਾ, ਸ਼ਾਨ ਅਤੇ ਹੱਕਾਂ ਦੇ ਪੱਖੋਂ ਜਨਮ ਤੋਂ ਹੀ ਆਜ਼ਾਦ ਹੈ ਅਤੇ ਸੁਤੇ ਸਿੱਧ ਸਾਰੇ ਲੋਕ ਬਰਾਬਰ ਹਨ । ਉਨ੍ਹਾਂ ਸਭਨਾ ਨੂੰ ਤਰਕ ਅਤੇ ਜ਼ਮੀਰ ਦੀ ਸੌਗਾਤ ਮਿਲੀ ਹੋਈ ਹੈ ਅਤੇ ਉਨ੍ਹਾਂ ਨੂੰ ਭਰਾਤਰੀਭਾਵ ਦੀ ਭਾਵਨਾ ਰਖਦਿਆਂ ਆਪਸ ਵਿਚ ਵਿਚਰਣਾ ਚਾਹੀਦਾ ਹੈ ।", - "type": "UncategorizedText" - }, - { - "element_id": "e81229801afdd767a6ca59c9877783bc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "urd" - ] - }, - "text": "Panjabi, Western سارے انسان آزاد تے حقوق تے عزت دے لحاظ نال برابر پیدا ہوندے نیں ۔ ۔ اوہ عقل سمجھ تے چنگے مندے دی پچھان تے احساس رکھدے نے ایس واسطے اوہناں نوں اک دوجے نال بھائی چارے والا سلوک کرنا چاہی دا اے ۔ ۔", - "type": "UncategorizedText" - }, - { - "element_id": "4f3aebd4854cd6b0308eca4661657a32", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Papiamentu Tur ser humano ta nace liber y igual den dignidad y den derecho. Nan ta dota cu rason y cu consenshi y nan mester comporta nan den spirito di fraternidad pa cu otro.", - "type": "NarrativeText" - }, - { - "element_id": "a2c1dda9330915ecdfba4af7c21da5c0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fas" - ] - }, - "text": "Pashto, Northern د بشر ټول افراد ازاد نړۍ ته راځي او د حيثيت او د حقوقو له پلوه سره برابر دي۔ ټول د عقل او وجدان خاوندان دي او بايد يو له بل سره د ورورۍ په روحيه سره چلنند کړي۔", - "type": "UncategorizedText" - }, - { - "element_id": "7e9ad6a402b6252e85be01ffafa1eb5e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Picard Tos lès-omes vinèt å monde lîbes èt égåls po çou qu'èst d' leû dignité èt d' leûs dreûts. Leû re̊zon èt leû consyince elzî fe̊t on d'vwér di s'kidûre inte di zèle come dès frès", - "type": "NarrativeText" - }, - { - "element_id": "323d61680164a581d21b57bdcbf0d26e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "afr" - ] - }, - "text": "Pidgin, Nigerian Everi human being, naim dem born free and dem de equal for dignity and di rights wey we get, as human beings, God come give us beta sense wey we de take tink well, well and beta mind, sake for dis, we must to treat each other like broda and sister.", - "type": "NarrativeText" - }, - { - "element_id": "ac876cc3346916765112fe76163aaf80", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nor" - ] - }, - "text": "Pijin Evri man en mere olketa born frii en ikwol lo digniti en raits blo olketa. Olketa evriwan olketa garem maeni fo tingting en olketa sapos fo treatim isada wittim spirit blo bradahood.", - "type": "NarrativeText" - }, - { - "element_id": "47fc36adefb94fbc8ce77cce0ef3cd95", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Pintupi-Luritja Nganana maru tjuta, tjulkura tjuta, manta yurungka parrari nyinapayi tjutanya liipulala nyinanyi, nganana yanangu maru tjuta wiya kuyakuya. Yuwankarrangkuya palya nintingku kulini. Tjanaya palya kutjupa tjutaku tjukarurru nyinanytjaku, walytja tjuta nguwanpa, mingarrtjuwiya. Tjungungku palyangku kurrunpa kutjungku.Wangka ngaangku nganananya tjakultjunanyi rapa ngaranytjaku kutjupa tjuta nguwanpa.", - "type": "NarrativeText" - }, - { - "element_id": "b3fdb7f8753265489381d2f45c0d5b0a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Pipil Muchi ne tay gen tu weyga nestiwit tamagixti genga tik ekneliat wan ipal wan gichiwtiwit ipal ma munegigan ne se pal ne se.", - "type": "UncategorizedText" - }, - { - "element_id": "ec8e1439bfe9914fdac211b8f24455dd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Pohnpeian Tohn sampa karos ipwiwei nan saledek oh duwepenehte nan arail wasa oh arail pwung. Arail marain oh pehm ih utakerail kahrehda korusie konehng sawaspene nin duwen pirien ehu.", - "type": "NarrativeText" - }, - { - "element_id": "cad1fbc2c59a2ab610912476278d0204", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "pol" - ] - }, - "text": "Polish Wszyscy ludzie rodzą się wolni i równi pod względem swej godności i swych praw. Są oni obdarzeni rozumem i sumieniem i powinni postępować wobec innych w duchu braterstwa.", - "type": "NarrativeText" - }, - { - "element_id": "07022bc1c3bb5010208399375dc1b813", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "por" - ] - }, - "text": "Portuguese (Brazil) Todos os seres humanos nascem livres e iguais em dignidade e direitos. São dotados de razão e consciência e devem agir em relação uns aos outros com espírito de fraternidade.", - "type": "NarrativeText" - }, - { - "element_id": "7925a3ec12f3766bebb236e3ec5bdc60", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "por" - ] - }, - "text": "Portuguese (Portugal) Todos os seres humanos nascem livres e iguais em dignidade e em direitos. Dotados de razão e de consciência, devem agir uns para com os outros em espírito de fraternidade.", - "type": "NarrativeText" - }, - { - "element_id": "4db9c27acd6b5a924d0ac2dde81f03ac", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Pular NEDDHANKE EN FOW DYIBINTE NO HETTII NO FOTA E DHI FOW, E NDIMU E HANDANDHI. BHE DYIBINDINTE E HAGGHIL E FAAMU ; HIBHE HAANI DYOGONDIRDE E NDER HAGGHIL NEENEGOOTAANKAAKU.", - "type": "NarrativeText" - }, - { - "element_id": "dc4348bae7eccbd8e30af1763958fee9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ara" - ] - }, - "text": "Pular (Adlam) 𞤋𞤲𞥆𞤢𞤥𞤢 𞤢𞥄𞤣𞤫𞥅𞤶𞤭 𞤬𞤮𞤬 𞤨𞤮𞤼𞤭، 𞤲'𞤣𞤭𞤥𞤯𞤭𞤣𞤭 𞤫 𞤶𞤭𞤦𞤭𞤲𞤢𞤲𞥆𞤣𞤫 𞤼𞤮 𞤦𞤢𞤲𞥆𞤺𞤫 𞤸𞤢𞤳𞥆𞤫𞥅𞤶𞤭. 𞤉𞤩𞤫 𞤲'𞤺𞤮𞥅𞤣𞤭 𞤥𞤭𞥅𞤶𞤮 𞤫 𞤸𞤢𞤳𞥆𞤭𞤤𞤢𞤲𞤼𞤢𞥄𞤺𞤢𞤤 𞤫𞤼𞤫 𞤫𞤩𞤫 𞤨𞤮𞤼𞤭 𞤸𞤵𞥅𞤬𞤮 𞤲'𞤣𞤭𞤪𞤣𞤫 𞤫 𞤲'𞤣𞤫𞤪 𞤩 𞤭𞤴𞤲𞤺𞤵𞤴𞤵𞤥𞥆𞤢𞥄𞤺𞤵.", - "type": "NarrativeText" - }, - { - "element_id": "9c7d0e713be2017eba040780765856df", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "swa" - ] - }, - "text": "Purepecha Iamendu k'uiripuecha janguarhiparini ka majku jarhati ka jurhimbekuecha jingoni kueraaŋasondikso ka, juajtakuarhisïndiksï ambakiti eratsekua ka kaxumbikua, jatsistiksï eskaksï sesi arhijperaaka.", - "type": "NarrativeText" - }, - { - "element_id": "9478aa88ff2306d5f2acc6b92e055546", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "som", - "cat", - "est" - ] - }, - "text": "Q'eqchi' Chijunil li poyanam juntaq'eet wankil xloq'al naq nake'yo'la, ut kama' ak reheb' naq wan xna'leb'eb ut nake'reek'a rib', tento naq te'xk'am rib' sa' usilal chirib'ilrib'eb'.", - "type": "UncategorizedText" - }, - { - "element_id": "e7cb3a61bb828a46ce008b4251df5ef3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "swa" - ] - }, - "text": "Quechua, Ambo-Pasco Lapan runa kay pachach'u yurin libri kawananpaq, lapanchinuy iwal respetasha kananpaqmi, mana pipis jarupänanpaq, lapanpis iwal yarpach'akuy yach'aqmi, alita mana alita tantiyar kawananpaq. Chaynuy runa masinwan juknin jukninwan kuyanakur kapäkuchun", - "type": "NarrativeText" - }, - { - "element_id": "7af8d8dd7e7418eed6057bb221448506", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind", - "swa", - "som" - ] - }, - "text": "Quechua, Arequipa-La Unión Kanmi derechonchiskuna llapanchispa, nacesqanchismanta. Kantaqmi llapanchispa runa kayninchis. Manan runa kanchu manay derechoyoq. Huk runaq derecho hukpawan kaqllan kan. Kanmi derechonchis llapanchispa allin kawsay libre tiyananchispaq. Llapan runaqpan kan yuyayninchis yachanapaq. Llapanchis kasun llapa runa masinchiskunawan munanakunapaq, huk ayllu hina.", - "type": "NarrativeText" - }, - { - "element_id": "d4c2a1e138b9d930e777cdaf26a52733", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Quechua, Ayacucho Lliw runakunam nacesqanchikmantapacha libre kanchik, lliw derechonchikpipas iguallataqmi kanchik. Yuyayniyoq kasqanchikraykum hawkalla aylluntin hina kawsayta debenchik llapa runakunawan.", - "type": "NarrativeText" - }, - { - "element_id": "f4bb6dc9e8533755a35983f62fd63e34", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Quechua, Cajamarca Yumbay ollqokuna, warmikuna pullalla kashun leyninchiqkunawan. Manam ni pipapis kriyadunchu kanchiqllapa. Suqninchiq, suqninchiq atinchiqllapa yuyayta \"imam alli, imam mana allichu\" nishpa. Chayshina kaptin, shumaqta tiyashunllapa suq ayllushinalla.", - "type": "NarrativeText" - }, - { - "element_id": "9cd272d47101a5545b07fa98899d9f70", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl", - "som" - ] - }, - "text": "Quechua, Cusco Llapa runan kay pachapi paqarin qispisqa, \"libre\" flisqa, allin kausaypi, chaninchasqa kausaypi kananpaq, yuyayniyoq, yachayniyoq runa kasqanman jina. Llapa runamasinwantaqmi wauqentin jina munanakunan.", - "type": "NarrativeText" - }, - { - "element_id": "7838a28da590ff7bb2ea5c7a48ba93fc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som", - "tgl" - ] - }, - "text": "Quechua, Huamalíes-Dos de Mayo Huánuco Lapan runakunapis yurikuyan librimi y wakinkaqkunanaw rispitashqa, mana jarukushqa kayänanpaq. Saynawmi runakunaqa yuriyan shumaq yarpayyuq, alitapis mana alitapis reqiykar y seqay kuyapäkuyyuq. Saymi runakuna ali kawakuyänan jukninwan jukninwanpis.", - "type": "NarrativeText" - }, - { - "element_id": "08720fc9c770f44e38435bc27b49867d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Quechua, Huaylas Ancash Meyqan nunapis manam pipa sirweqnin nuna kananpaqtsu yurikushqa. I nuna karninmi meyqan nunapis juk láyatsu kayanman derëchunkunachowpis. I yarpachakiyta yacharninmi i allita mana allita shonqonkunachow mákurninmi nunakuna jukninta wiyanakur kayanman.", - "type": "NarrativeText" - }, - { - "element_id": "34a8df5528e399552e033b89176957b0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl", - "fin", - "est" - ] - }, - "text": "Quechua, Margos-Yarowilca-Lauricocha Lapantsikunapis Iibrimi yurishqantsi. Bälintsimi y derëchuntsikunapis wakinkaqkunanoqlapami. Yarpaynintsikunapis kaykanmi runa mayintsikunawan juk wawqinoq kuyanakur kawapäkunantsipaq.", - "type": "NarrativeText" - }, - { - "element_id": "68b63eb96c576d943261ea39555162be", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "tgl", - "swa" - ] - }, - "text": "Quechua, Northern Conchucos Ancash Mayqan runapis manam pipa isklabun kananpaqtsu yurishqa. Y runa karninmi llapan runakuna iwal kayanman dirichunkunachawpis. Y yarpayta yacharninmi y allita mana allita shunqunkunachaw makurninmi runakuna huknin hukninta rispitanakur kayanman.", - "type": "NarrativeText" - }, - { - "element_id": "ecc5d074ce9be67e187d19b4aabf87c5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind", - "swa" - ] - }, - "text": "Quechua, North Junín Lapan runas kay pachachru nasimun juk rantisha runanuy mana pitas sirbinanpaqmi, alipa rikasha kananpaqmi, washasha kananpaqmi. Lapan runakunas nasipaakamun yarpayniyoqmi naatan tantiyayniyoqmi ima lutanta rurapaakurursi tantiyakunanpaq. Lapan runakunas kawapaakunaman juk wawqenuylam.", - "type": "NarrativeText" - }, - { - "element_id": "af8bad9d59da3dc7cc6e613e743d2e7f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "tgl", - "som" - ] - }, - "text": "Quechua, South Bolivian Tukuy kay pachaman paqarimujkuna libres nasekuntu tukuypunitaj kikin obligacionesniycjllataj, jinakamalla honorniyojtaj atiyniyojtaj, chantaqa razonwantaj concienciawantaj dotasqa kasqankurayku, kawsaqe masipura jina, tukuy uj munakuyllapi kawsakunanku tian.", - "type": "NarrativeText" - }, - { - "element_id": "654791ed821f84e420d3742634a53e7c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa", - "eng" - ] - }, - "text": "Quechua (Unified Quichua, old Hispanic orthography) Tucuy runacuna quishpirihuán huiñán, pactacunahuampes, pay pura, umahuán, ayahuán chay shucuna shina, chaymantami shuclla shina causangacuna.", - "type": "UncategorizedText" - }, - { - "element_id": "fed959145c5a6c3d8da63241f6de77c5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa", - "tgl" - ] - }, - "text": "Quichua, Chimborazo Highland Tukuy runakunami maypipash kishpirishka, sumaykaypi(dignidad) paktapakta wacharin. Chay wawakunaka sumak yuyaykuna, tiksiyuyay (fundamental), huntami kan; chaymantami runapuraka shukllashina tukushpa, yanaparishpa kawsana kan.", - "type": "NarrativeText" - }, - { - "element_id": "8dc5fe5e7e5f4841e4a057d528ffb483", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "sqi" - ] - }, - "text": "Rarotongan Kua anau rangatira ia te tangata katoatoa ma te aiteite i te au tikaanga e te tu ngateitei tiratiratu. Kua ki ia ratou e te mero kimi ravenga e te akavangakau e kia akono tetai i tetai, i roto i te vaerua piri anga taeake.", - "type": "NarrativeText" - }, - { - "element_id": "f0f216272ee0f7e11e21eb4ca1752777", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "cat" - ] - }, - "text": "Romagnolo Tot j essèri umèn i nàs lébri e cumpagn in dignità e dirét. Lou i è dutid ad rasoun e ad cuscinza e i à da operè, ognun ti cunfrunt at ch'j ilt, sa sentimint ad fratelènza.", - "type": "NarrativeText" - }, - { - "element_id": "a84b6ff398b4f815054e7b47107ce163", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv", - "swe" - ] - }, - "text": "Romani, Balkan Savorre manuśa biandõn meste thaj barabar k-o demnipen aj k-e hakaja. Si len godi aj somzanipen thaj si len te trąden pen jekh karing o aver and-o vogi e phralimnasqoro.", - "type": "NarrativeText" - }, - { - "element_id": "dd72113ef6db4b69482adf28078a6090", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv" - ] - }, - "text": "Romani, Balkan (1) Sa e manušikane strukture bijandžona tromane thaj jekhutne ko digniteti thaj čapipa. Von si baxtarde em barvale gndaja thaj godžaja thaj trubun jekh avereja te kherjakeren ko vodži pralipaja.", - "type": "NarrativeText" - }, - { - "element_id": "d1d78e5ce9c3fe2071093b3f74f8f9b8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron" - ] - }, - "text": "Romanian (1953) Toate ființele umane se nasc libere și egale în demnitate și în drepturi. Ele sînt înzestrate cu rațiune și conștiință și trebuie să se comporte unele față de altele în spiritul fraternității.", - "type": "NarrativeText" - }, - { - "element_id": "ffd7f486f85cc12fffdee64c8dc1c47c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron" - ] - }, - "text": "Romanian (1993) Toate ființele umane se nasc libere și egale în demnitate și în drepturi. Ele sunt înzestrate cu rațiune și conștiință și trebuie să se comporte unele față de altele în spiritul fraternității.", - "type": "NarrativeText" - }, - { - "element_id": "81db31b50da57a040bad82d9af2297df", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ron" - ] - }, - "text": "Romanian (2006) Toate ființele umane se nasc libere și egale în demnitate și în drepturi. Ele sunt înzestrate cu rațiune și conștiință și trebuie să se comporte unele față de altele în spiritul fraternității.", - "type": "NarrativeText" - }, - { - "element_id": "cadc80db78bd586f5f18217272cfdb17", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "ita" - ] - }, - "text": "Romansch Tuots umans naschan libers ed eguals in dignità e drets. Els sun dotats cun intellet e conscienza e dessan agir tanter per in uin spiert da fraternità.", - "type": "NarrativeText" - }, - { - "element_id": "4295c14118d555a1bd3be37701a4578e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu", - "cat" - ] - }, - "text": "Romansch (Grischun) Tut ils umans naschan libers ed eguals en dignitad ed en dretgs. Els èn dotads cun raschun e conscienza e duain agir in vers l’auter en spiert da fraternitad.", - "type": "NarrativeText" - }, - { - "element_id": "d7c3646cc8bf5af91fa007bcdc86ad53", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu", - "cat", - "eng" - ] - }, - "text": "Romansch (Puter) Tuot ils umauns naschan libers ed eguels in dignited ed in drets. Els sun dotos cun radschun e conscienza e dessan agir ün invers l’oter in spiert da fraternited.", - "type": "NarrativeText" - }, - { - "element_id": "a0daace15fe9f49d73fcdd9e3b86f001", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "deu", - "ita" - ] - }, - "text": "Romansch (Surmiran) Tot igls carstgangs neschan libers ed eguals an dignitad ed an dretgs. Els èn dotos cun raschung e schientscha e duessan ager l’egn vers l’oter an spiert da fraternitad.", - "type": "NarrativeText" - }, - { - "element_id": "57126ecde8022743581d3932507d8b63", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu", - "nld", - "fra", - "cat", - "ita" - ] - }, - "text": "Romansch (Sursilvan) Tut ils humans neschan libers ed eguals en dignitad ed en dretgs. Els ein dotai cun raschun e cunscienzia e duein agir in viers l’auter en spért da fraternitad.", - "type": "NarrativeText" - }, - { - "element_id": "82fb166f28096b77e6b865ce44135e16", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "deu" - ] - }, - "text": "Romansch (Sutsilvan) Tut igls humans neschan libers ad eguals an dignitad ad an dretgs. Els en dotos cun raschùn a cunzienzia a den agir egn anviers l’oter an spiert da fraternitad.", - "type": "NarrativeText" - }, - { - "element_id": "53246b60d8dbe52f7f323cfe27507738", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "ita" - ] - }, - "text": "Romansch (Vallader) Tuot ils umans naschan libers ed eguals in dignità ed in drets. Els sun dotats cun radschun e conscienza e dessan agir ün invers l’oter in ün spiert da fraternità.", - "type": "NarrativeText" - }, - { - "element_id": "97e1a02de327531973da6bb83feba879", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Rundi Abantu bose bavuka bishira bakizana kandi bangana mu gateka no mu ngingo zibubahiriza. Bafise ubwenge n'umutima kandi bategerezwa kwubahana nk'abavandimwe.", - "type": "NarrativeText" - }, - { - "element_id": "7b1fe5da3cfa2322dd960a870a966d3a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Russian Все люди рождаются свободными и равными в своем достоинстве и правах. Они наделены разумом и совестью и должны поступать в отношении друг друга в духе братства.", - "type": "NarrativeText" - }, - { - "element_id": "6bf0412e57e78aa58fbf28eb5d55ecb1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Rwanda Abantu bose bavuka aliko bakwiye agaciro no kwubahwa kimwe. Bose bavukana ubwenge n'umutima, bagomba kugilirana kivandimwe.", - "type": "NarrativeText" - }, - { - "element_id": "48332b010fe58bc794e833308da30575", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "slv" - ] - }, - "text": "Saami, North Buot olbmot leat riegádan friddjan ja olmmošárvvu ja olmmošvuoigatvuođaid dáfus. Sii leat jierbmalaš olbmot geain lea oamedovdu ja sii gálggaše leat dego vieljačagat.", - "type": "NarrativeText" - }, - { - "element_id": "373656c2cab80370dd2768316c8a725e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Salar Heme kishler hür der, haysiyet ma haklarde adil der, mantik ma vicdan var, kardeshlikden davraneshge.", - "type": "UncategorizedText" - }, - { - "element_id": "3b9323b658ffe68e575944ba1afb73e3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "swa", - "cat" - ] - }, - "text": "Samoan O tagata soifua uma ua saoloto lo latou fananau mai, ma e tutusa o latou tulaga aloaia faapea a latou aia tatau. Ua faaeeina atu i a latou le mafaufau lelei ma le loto fuatiaifo ma e tatau ona faatino le agaga faauso i le va o le tasi i le isi,", - "type": "NarrativeText" - }, - { - "element_id": "ddfa143fc42a89f1e4f7b99ce0028962", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "vie" - ] - }, - "text": "Sango Adü âzo kûê yamba, ngâ âla lîngbi terê na lêgë tî nëngö-terê na tî ângangü. Ala kûê awara ndarä na börö-li sï âla lîngbi tî dutï na âmbâ tî âla gï na lêngö söngö.", - "type": "NarrativeText" - }, - { - "element_id": "ba8456690a521bd0fb0bb757c188f302", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hin" - ] - }, - "text": "Sanskrit सर्वे मानवाः स्वतन्त्राः समुत्पन्नाः वर्तन्ते अपि च, गौरवदृशा अधिकारदृशा च समानाः एव वर्तन्ते। एते सर्वे चेतना-तर्क-शक्तिभ्यां सुसम्पन्नाः सन्ति। अपि च, सर्वेऽपि बन्धुत्व-भावनया परस्परं व्यवहरन्तु।", - "type": "UncategorizedText" - }, - { - "element_id": "7013f596e8a99afdd7965ac753815ad9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nep" - ] - }, - "text": "Sanskrit (Grantha) 𑌸𑌰𑍍𑌵𑍇 𑌮𑌾𑌨𑌵𑌾𑌃 𑌸𑍍𑌵𑌤𑌨𑍍𑌤𑍍𑌰𑌾𑌃 𑌸𑌮𑍁𑌤𑍍𑌪𑌨𑍍𑌨𑌾𑌃 𑌵𑌰𑍍𑌤𑌨𑍍𑌤𑍇 𑌅𑌪𑌿 𑌚, 𑌗𑍌𑌰𑌵𑌦𑍃𑌶𑌾 𑌅𑌧𑌿𑌕𑌾𑌰𑌦𑍃𑌶𑌾 𑌚 𑌸𑌮𑌾𑌨𑌾𑌃 𑌏𑌵 𑌵𑌰𑍍𑌤𑌨𑍍𑌤𑍇। 𑌏𑌤𑍇 𑌸𑌰𑍍𑌵𑍇 𑌚𑍇𑌤𑌨𑌾-𑌤𑌰𑍍𑌕-𑌶𑌕𑍍𑌤𑌿𑌭𑍍𑌯𑌾𑌂 𑌸𑍁𑌸𑌮𑍍𑌪𑌨𑍍𑌨𑌾𑌃 𑌸𑌨𑍍𑌤𑌿। 𑌅𑌪𑌿 𑌚, 𑌸𑌰𑍍𑌵𑍇𑌽𑌪𑌿 𑌬𑌨𑍍𑌧𑍁𑌤𑍍𑌵-𑌭𑌾𑌵𑌨𑌯𑌾 𑌪𑌰𑌸𑍍𑌪𑌰𑌂 𑌵𑍍𑌯𑌵𑌹𑌰𑌨𑍍𑌤𑍁।", - "type": "NarrativeText" - }, - { - "element_id": "d9dd825f97644f9be308505d418e9ea9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "por", - "ita" - ] - }, - "text": "Sãotomense Tudu nguê di mundu ca nancê livli e igual ni dignidade e ni dirêtu. Punda nen ca pensá e nen tê cunxensa, selá nen fé tudu cuá cu tençón de lumón.", - "type": "NarrativeText" - }, - { - "element_id": "ea94e46fedb24cbbc337bb5d30608ead", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "por" - ] - }, - "text": "Sardinian, Logudorese Totu sos èsseres umanos naschint lìberos e eguales in dinnidade e in deretos. Issos tenent sa resone e sa cussèntzia e depent operare s'unu cun s'àteru cun ispìritu de fraternidade.", - "type": "NarrativeText" - }, - { - "element_id": "135f949e79e915feb11563f40072624d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "deu" - ] - }, - "text": "Saxon, Low All de Minschen sünd frie un gliek an Wüürd un Rechten baren. Se hebbt Vernunft un een Geweten un se schüllt sik Bröder sien.", - "type": "NarrativeText" - }, - { - "element_id": "f82500b37b7d47fc3ce4735513427e14", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Scots Aw human sowels is born free and equal in dignity and richts. They are tochered wi mense and conscience and shuld guide theirsels ane til ither in a speirit o britherheid.", - "type": "NarrativeText" - }, - { - "element_id": "49685f2659217462214b13c3594d1423", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "fin" - ] - }, - "text": "Secoya Si'apai aide'oyë kua'ye peoye kui'ne siayë'kë maka pa'iye kui'ne tutupaye koni, jaje kuasase'sëtepi kuaju'i'ne peoye ñese saiye pa'iji ko̱kaijë yekë paireje.", - "type": "NarrativeText" - }, - { - "element_id": "e0ca8f739a2a274e0e30bcd509b308e2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "urd" - ] - }, - "text": "Seraiki سارے انسان ازادا تے حقوق تے عزت دے اعتبار نال ہکو ڄئے پیدا تھیندن ۔ قدرت ولوں انہاں کوں عقل تے سمجھ عطا تھیندی اے ۔ ہیں کیتے ہک ڋوجھے نال بھرپی داسلوک کرڻا چاہی دا اے ۔", - "type": "UncategorizedText" - }, - { - "element_id": "f855b701f2717951ee7041f505936e9e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mkd" - ] - }, - "text": "Serbian (Cyrillic) Сва људска бића рађају се слободна и једнака у достојанству и правима. Она су обдарена разумом и свешћу и треба једни према другима да поступају у духу братства.", - "type": "NarrativeText" - }, - { - "element_id": "1e1d32ffc1c937e2dc9b3b4e6b8a1453", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv" - ] - }, - "text": "Serbian (Latin) Sva ljudska bića rađaju se slobodna i jednaka u dostojanstvu i pravima. Ona su obdarena razumom i svešću i treba jedni prema drugima da postupaju u duhu bratstva.", - "type": "NarrativeText" - }, - { - "element_id": "6a973a162a71cdf61973afc03d10bb08", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "afr", - "tgl" - ] - }, - "text": "Serer-Sine Wiin we naa ñoowaa na adna, den fop mbodu no ke war na oxnu refna na den a jega o ngalaat umpi yiif um, le mbarin o meƭtootaa baa mbaag o ñoow den fop no fog.", - "type": "NarrativeText" - }, - { - "element_id": "201296ccbaf34300a62d4a087915bf84", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Seselwa Creole French Nou tou imen nou’n ne dan laliberte ek legalite, dan nou dignite ek nou bann drwa. Nou tou nou annan kapasite pou rezonnen, e fodre nou azir anver lezot avek en lespri fraternel.", - "type": "NarrativeText" - }, - { - "element_id": "f602d39c8cf6ba79e59adce09af30f26", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Shan ၵူၼ်းၵူႊၵေႃႉၼႆႉ ပဵၼ်ဢၼ်ၵိူတ်ႇမႃးလူၺ်ႈၵုင်ႇမုၼ်ဢၼ်လွတ်ႈလႅဝ်းၽဵင်ႇပဵင်းၵၼ် လႄႈ သုၼ်ႇလႆႈဢၼ် လွတ်ႈလႅဝ်းၽဵင်ႇ ပဵင်းၵၼ်။ ၶဝ်ၼႆႉ မီးၺၢၼ်ႇဢၼ်မေႃထတ်းသၢင် လႄႈ ၸႂ်ဢၼ်ႁူႉၸၵ်းၾိင်ႈတိုဝ်းၵမ် ၼၼ်ႉလႄႈ ထုၵ်ႇဝႆႉၸႂ်ပီႈဢွၵ်ႇ ၼွင်ႉၶႆႇၵၼ်သေ တိတ်းတေႃႇၵၼ်။", - "type": "UncategorizedText" - }, - { - "element_id": "b15fec7016d85b918b25ef8273accb9c", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "eng", - "por" - ] - }, - "text": "Sharanahua Nantifin naanno rasisin cainnifoquin. Tsoan mato iscahuatiroma cuscan, -Manfin uhuunnacoinquin. Ahuua tsacatama rarama shara ninonfo ishon. Nantififain aton mapo shinantirofoquin. Ato nomuranrin chaca iyamarain sharamainqui icashon. Ascanrifiantan nantifin manifoti yorahuan tanannon icashu.", - "type": "NarrativeText" - }, - { - "element_id": "20e37b3914fade183f3e76b200daccbd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "sqi", - "tur", - "nor" - ] - }, - "text": "Shilluk Dhanhø bëne ba anywølø e path ki bäng, ge pär ki yij bëëdø geki dyërø. gïn-a dwaddi kiper gen yï gen da rumi ki bëëdø mø göög gen ki pyëw akyel ga nyimëgg.", - "type": "NarrativeText" - }, - { - "element_id": "9be888269d99ba5b9d4200b2a6d65346", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cat", - "fra" - ] - }, - "text": "Shipibo-Conibo Jatíbi joninra huetsa jonibaon yoiai nincáresti iqui, jahueraquibi jaconmai iamaquin; jainoash jahuen queena jacon jahuéquibo ati jahuequescamabi iqui, tsonbira amayamatima iqui. Jaticashbira jascara aresti jacon shinanya iti jahuequescamabi iqui, jahuequescarainoash picota joni inonbi. Huestiora huestiorabora jahuéqui ati shinanya iqui; jainshon onanribique jahueratoqui jacon iqui jainoash jaconma iqui ishon. Ja copira huetsa jonibires inonbi non jato jaconharesti iqui, non huetsabi non acai quescaaquin.", - "type": "NarrativeText" - }, - { - "element_id": "6398e07c884e0212b6da4c16f73364fa", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "hrv" - ] - }, - "text": "Shona Vanhu vese vanoberekwa vakasununguka uyewo vakaenzana pahunhu nekodzero dzavo. Vanhu vese vanechipo chokufunga nekuziva chakaipa nechakanaka saka vanofanira kubatana nomweya wohusahwira.", - "type": "NarrativeText" - }, - { - "element_id": "98765accca3aa276e32acc6ddb665f01", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Shor Парчын кижи, по чарыққа туғчадып, тең, пош туғча. Кижилер сағыштығ, ақтығ туғчалар, кижилерге пашқа кижилербе арғыштаныштарға керек.", - "type": "NarrativeText" - }, - { - "element_id": "06b44e2713d2ab9cbfdbffecc788465a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Shuar Aents yajá nunkanam akínia asamtaish, metekrak ainiaji. Tumasha ni chichamenka tuke amiktin aíniawai. Ni iniakmamuri, ní chichamejaituke aniakmamsar chichakartin aíniawai. Tuma asamtai aents mash nekawar, penker metekrak, nuamtak wará warat shiir pujusarmi tusar aárma awai.", - "type": "NarrativeText" - }, - { - "element_id": "8e0cb1b65226a998ba0e2831e44dbe49", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin", - "swa", - "som" - ] - }, - "text": "Sidamo Manchi beetti kalaqamunni wolaphinoho. Ayirrinyunninna qoossotennino taaloho. Huwatanno tiiano kalaqamunni ba’raarinoha ikkasinni mittu wolu ledo rodiimmate ayyaaninni hee’ra noosi.", - "type": "NarrativeText" - }, - { - "element_id": "1129172b2baa1c40a3ab800d0d28f02b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Sinhala සියලු මනුෂ්‍යයෝ නිදහස්ව උපත ලබා ඇත. ගරුත්වයෙන් හා අයිතිවාසිකම්වලින් සමාන වෙති. යුක්ති අයුක්ති පිළිබඳ හැඟීමෙන් හා හෘදය සාක්ෂියෙන් යුත් ඔවුන්, ඔවුනොවුන්ට සැළකිය යුත්තේ සහෝදරත්වය පිළිබඳ හැඟීමෙනි.", - "type": "NarrativeText" - }, - { - "element_id": "7f18ad35feab9b6f20b97d87856143c8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi", - "spa", - "swa" - ] - }, - "text": "Siona Sia'bai̱ aideo'yë goa'ye beoye gu̱i'ne sia'yë'quë maca bai'ye gu̱i'ne quëco baye co̱ni, ja̱je̱ goachase'sëte goa'ju̱i'ñe beoye ñese saiye bai'ji co̱caijë yequë bai̱reje.", - "type": "NarrativeText" - }, - { - "element_id": "c82f4633a9724d1de7dfe866d1429080", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slk" - ] - }, - "text": "Slovak Všetci ľudia sa rodia slobodní a sebe rovní , čo sa týka ich dostojnosti a práv. Sú obdarení rozumom a majú navzájom jednať v bratskom duchu.", - "type": "NarrativeText" - }, - { - "element_id": "e35bcc47bb6a16df85ee45b9ba96ecf6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv" - ] - }, - "text": "Slovenian Vsi ljudje se rodijo svobodni in imajo enako dostojanstvo in enake pravice. Obdarjeni so z razumom in vestjo in bi morali ravnati drug z drugim kakor bratje.", - "type": "NarrativeText" - }, - { - "element_id": "922133be59b9810ea4ae495847315aa2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Somali Aadanaha dhammaantiis wuxuu dhashaa isagoo xor ah kana siman xagga sharafta iyo xuquuqada Waxaa Alle (Ilaah) siiyay aqoon iyo wacyi, waana in qof la arkaa qofka kale ula dhaqmaa si walaaltinimo ah.", - "type": "NarrativeText" - }, - { - "element_id": "5d86d8cbc9dda45558ccf60a3974e66a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "ind", - "tur" - ] - }, - "text": "Soninke Haadama renme su saareyen ŋa an na du-kitten ña, an nta sere komaaxu, an do soron su yan yekka dorontaaxu do taqu. Haqilen, wa sere su, a do soro kuttu nan siri terene doome kappalengaaxu kanma.", - "type": "NarrativeText" - }, - { - "element_id": "2254a39b8eef4c825a973c26eb9364c9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "pol", - "hrv" - ] - }, - "text": "Sorbian, Upper Wšitcy čłowjekojo su wot naroda swobodni a su jenacy po dostojnosći a prawach. Woni su z rozumom a swědomjom wobdarjeni a maja mjezsobu w duchu bratrowstwa wobchadźeć.", - "type": "NarrativeText" - }, - { - "element_id": "f6b37545577a2f9471636b40acbc5bf3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "som", - "swa" - ] - }, - "text": "Sotho, Northern Batho ka moka ba belegwe ba lokologile le gona ba na le seriti sa go lekana le ditokelo. Ba filwe monagano le letswalo mme ba swanetše go swarana ka moya wa bana ba mpa.", - "type": "NarrativeText" - }, - { - "element_id": "4350ebc60622fe730a65cf3c01ab0656", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng", - "som", - "swa", - "ind" - ] - }, - "text": "Sotho, Southern Batho bohle ba tswetswe ba lokolohile mme ba lekana ka botho le ditokelo. Ba tswetswe le monahano le letswalo mme ba tlamehile ho phedisana le ba bang ka moya wa boena.", - "type": "NarrativeText" - }, - { - "element_id": "51733b425e93924dbea419a28d2ee3d2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "South Azerbaijani Tüm insanlar hür döğarlar, hak ve onur bakımından eşit döğarlar, onlar akıl ve vicdana sahiptirler ve birbirlerine karşı kardeşlik ruhu içinde davranmalılar.", - "type": "UncategorizedText" - }, - { - "element_id": "7c2e8d871037d3d152d88dc5510cb236", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Spanish Todos los seres humanos nacen libres e iguales en dignidad y derechos y, dotados como están de razón y conciencia, deben comportarse fraternalmente los unos con los otros.", - "type": "NarrativeText" - }, - { - "element_id": "816bdd2e0af6f8cc514fe60150f4714b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa" - ] - }, - "text": "Spanish (resolution) Todos los seres humanos nacen libres e iguales en dignidad y derechos y, dotados como están de razón y conciencia, deben comportarse fraternalmente los unos con los otros.", - "type": "NarrativeText" - }, - { - "element_id": "52f8c540d96bbacf23226e192b534cb7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Sukuma Banhu bose bakabyalagwa na wiyabi na bakabizaga na makujo na sekge jabo jilenganilile. Banhu bose bakabizaga na masala na buhabuji; hukuyomba balidakilwa gubi na witogwa gidi bana ba myaji umo.", - "type": "NarrativeText" - }, - { - "element_id": "a0574e22703ed205dfa2f5a01e2341bd", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind" - ] - }, - "text": "Sunda Sakumna jalma gubrag ka alam dunya teh sifatna merdika jeung boga martabat katut hak-hak anu sarua . Maranehna dibere akal jeung hate nurani, campur-gaul jeung sasamana aya dina sumanget duduluran.", - "type": "NarrativeText" - }, - { - "element_id": "cf93d32f84284c9d205953f2720290ba", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "tur", - "ind" - ] - }, - "text": "Susu Adamadie birin barixinɛ e lan yɛtɛralui kui, yɛtɛ kolonyi nun yɛtɛ suxu kima. Fondoe nun faxamui na e bɛ boresuxu kima bariboreya fanyi kui.", - "type": "NarrativeText" - }, - { - "element_id": "5608031dfa2172453e04237b10f7a8b6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Swahili Watu wote wamezaliwa huru, hadhi na haki zao ni sawa. Wote wamejaliwa akili na dhamiri, hivyo yapasa watendeane kindugu.", - "type": "NarrativeText" - }, - { - "element_id": "7051a25e3c9317ae9774432a3874e4e3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "swa" - ] - }, - "text": "Swati Bonkhe bantfu batalwa bakhululekile balingana ngalokufananako ngesitfunti nangemalungelo. Baphiwe ingcondvo nekucondza kanye nanembeza ngakoke bafanele batiphatse nekutsi baphatse nalabanye ngemoya webuzalwane.", - "type": "NarrativeText" - }, - { - "element_id": "962be1c35a09978ec0be3e93852b6925", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swe" - ] - }, - "text": "Swedish Alla människor äro födda fria och lika i värde och rättigheter. De äro utrustade med förnuft och samvete och böra handla gentemot varandra i en anda av broderskap.", - "type": "NarrativeText" - }, - { - "element_id": "99cf243f8a375bd33a573d5c26fcce02", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Tagalog Ang lahat ng tao'y isinilang na malaya at pantay-pantay sa karangalan at mga karapatan. Sila'y pinagkalooban ng katwiran at budhi at dapat magpalagayan ang isa't isa sa diwa ng pagkakapatiran.", - "type": "NarrativeText" - }, - { - "element_id": "1fbce46911c4817cf2f0bf0db19d2f32", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Tagalog (Tagalog) ᜀᜅ ᜎᜑᜆ᜔ ᜅ ᜆᜂᜌ᜔ ᜁᜐᜒᜈᜒᜎᜅ ᜈ ᜋᜎᜌ ᜀᜆ᜔ ᜉᜈ᜔ᜆᜌ᜔ ᜉᜈ᜔ᜆᜌ᜔ ᜐ ᜃᜇᜅᜎᜈ᜔ ᜀᜆ᜔ ᜋ᜔ᜄ ᜃᜇᜓᜉᜆᜈ᜔᜶ ᜐᜒᜎᜌ᜔ ᜉᜒᜈᜄ᜔ᜃᜎᜓᜊᜈ᜔ ᜅ ᜃᜆ᜔ᜏᜒᜇᜈ᜔ ᜀᜆ᜔ ᜊᜓᜇ᜔ᜑᜒ ᜀᜆ᜔ ᜇᜉᜆ᜔ ᜋᜄ᜔ᜉᜎᜄᜌᜈ᜔ ᜀᜅ ᜁᜐᜆ᜔ ᜁᜐ ᜐ ᜇᜒᜏ ᜅ ᜉᜄ᜔ᜃᜃᜉᜆᜒᜇᜈ᜔᜶", - "type": "NarrativeText" - }, - { - "element_id": "f80202b3162be68cd2957c5c564ddc03", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita" - ] - }, - "text": "Tahitian E fanauhia te tā'āto'ara'a o te ta'ata-tupu ma te ti'amā e te ti'amanara'a 'aifaito. Ua 'ī te mana'o pa'ari e i te manava e ma te 'a'au taea'e 'oia ta ratou ha'a i rotopū ia ratou iho, e ti'a ai;", - "type": "NarrativeText" - }, - { - "element_id": "b5b3558a1982151293ab4f2c745e943b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Tai Dam ꪹꪕꪸꪉ ꪀꪱ ꪋꪴ ꫛ ꪎꪲꪉ ꪮꪮꪀ ꪣꪱ ꪻꪠ ꪁꪷ ꪻꪬ ꪼꪒ ꪕꪳ ꪕꪱꪉ ꪀꪾꪚ ꪹꪋꪷꪉ ꪝꪸꪉ ꪕꪮꪥ ꪩꪾ ꫛ ꪶꪔꪙ ꪠꪴ - ꪋꪴ ꪬꪺ ꫛ ꪻꪠ ꪁꪷ ꪻꪬ ꪣꪲ ꪁꪫꪸꪙ ꪎꪱꪉ ꪶꪎꪣ ꪩꪺꪉ ꪹꪥꪸꪒ ꫛ ꪀꪾꪚ ꪹꪥꪸꪒ ꪻꪊ ꪚꪴꪙ ꪀꪾꪚ ꪼꪒ ꪹꪚꪷꪉ ꪒꪲ ꪀꪾꪚ ꪫꪸꪀ ꪭꪰꪀ ꪵꪝꪉ ꪹꪏꪉ ꪹꪭꪙ ꪒꪸꪫ.", - "type": "UncategorizedText" - }, - { - "element_id": "424be8d53e2447fd43a7df9c88610eb3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mkd", - "rus", - "bul" - ] - }, - "text": "Tajiki Тамоми одамон озод ва аз лиҳози шарафу ҳуқуқ ба ҳам баробар ба дунё меоянд. Онҳо соҳиби ақлу виҷдонанд ва бояд бо якдигар муносибати бародарона дошта бошанд.", - "type": "NarrativeText" - }, - { - "element_id": "30aa2c0edeca02853a028f15110a6827", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Talysh Həmmə insonon bəştə ləyoğəti iyən həxonro ozod iyən bərobər movardə bedən. Çəvon şuur iyən vicdon hese, əve ki, deyəndı mınasibətədə bənə bıvə rəftor kardəninin.", - "type": "NarrativeText" - }, - { - "element_id": "615dde6386c8f1b795ccd07901216ce7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nep" - ] - }, - "text": "Tamang, Eastern म्होक्कोन (गोदोप) नोन म्हीम केपान्हापा हेन्छे नुन हाङपाङवा (स्वतन्त्र) याङवा हीन्ना । थे म्होक्कोनला (गोदोपला) च्योच्यो याङताम थेन महत्व मुला । थेनीकादेरी सेमबाङ (विचार शक्ति) देन थु-सेमसाङ मुबासे थेनीजुगुसे ह्राङन्हाङरी नुन थेत्माला सेमलेङमोग्याम्से (भवनाबाट) ग्ये लातोबान मुला ।", - "type": "UncategorizedText" - }, - { - "element_id": "f484ee723443631e755f61ec59737260", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur", - "nld" - ] - }, - "text": "Tamazight, Central Atlas Imdanen, akken ma llan ttlalen d ilelliyen msawan di lḥweṛma d yizerfan- ghur sen tamsakwit d lâquel u yessefk ad-tili tegmatt gar asen.", - "type": "NarrativeText" - }, - { - "element_id": "4fa699fe9b09ce455b4b7a0eceac23a4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Tamazight, Central Atlas (Tifinagh) ⵉⵎⴷⴰⵏⴻⵏ, ⴰⴽⴽⴻⵏ ⵎⴰ ⵍⵍⴰⵏ ⵜⵜⵍⴰⵍⴻⵏ ⴷ ⵉⵍⴻⵍⵍⵉⵢⴻⵏ ⵎⵙⴰⵡⴰⵏ ⴷⵉ ⵍⵃⵡⴻⵕⵎⴰ ⴷ ⵢⵉⵣⴻⵔⴼⴰⵏ-ⵖⵓⵔ ⵙⴻⵏ ⵜⴰⵎⵙⴰⴽⵡⵉⵜ ⴷ ⵍⴰⵇⵓⴻⵍ ⵓ ⵢⴻⵙⵙⴻⴼⴽ ⴰⴷ-ⵜⵉⵍⵉ ⵜⴻⴳⵎⴰⵜⵜ ⴳⴰⵔ ⴰⵙⴻⵏ.", - "type": "UncategorizedText" - }, - { - "element_id": "c36059cd99076234366c10f07f278260", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Tamazight, Standard Morocan ⴰⵔ ⴷ ⵜⵜⵍⴰⵍⴰⵏ ⵎⵉⴷⴷⵏ ⴳⴰⵏ ⵉⵍⴻⵍⵍⵉⵜⵏ ⵎⴳⴰⴷⴷⴰⵏ ⵖ ⵡⴰⴷⴷⵓⵔ ⴷ ⵉⵣⵔⴼⴰⵏ, ⵢⵉⵍⵉ ⴰⴽⵯ ⴷⴰⵔⵙⵏ ⵓⵏⵍⵍⵉ ⴷ ⵓⴼⵔⴰⴽ, ⵉⵍⵍⴰ ⴼⵍⵍⴰ ⵙⵏ ⴰⴷ ⵜⵜⵎⵢⴰⵡⴰⵙⵏ ⵏⴳⵔⴰⵜⵙⵏ ⵙ ⵜⴰⴳⵎⴰⵜ.", - "type": "UncategorizedText" - }, - { - "element_id": "703b672337c499aededf6f6696d6522f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tam" - ] - }, - "text": "Tamil மனிதப் பிறிவியினர் சகலரும் சுதந்திரமாகவே பிறக்கின்றனர்; அவர்கள் மதிப்பிலும், உரிமைகளிலும் சமமானவர்கள், அவர்கள் நியாயத்தையும் மனச்சாட்சியையும் இயற்பண்பாகப் பெற்றவர்கள். அவர்கள் ஒருவருடனொருவர் சகோதர உணர்வுப் பாங்கில் நடந்துகொள்ளல் வேண்டும்.", - "type": "NarrativeText" - }, - { - "element_id": "cd3e1810510aee192781e40eae1b0ddc", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tam" - ] - }, - "text": "Tamil (Sri Lanka) மனிதப் பிறிவியினர் சகலரும் சுதந்திரமாகவே பிறக்கின்றனர்; அவர்கள் மதிப்பிலும், உரிமைகளிலும் சமமானவர்கள், அவர்கள் நியாயத்தையும் மனச்சாட்சியையும் இயற்பண்பாகப் பெற்றவர்கள். அவர்கள் ஒருவருடனொருவர் சகோதர உணர்வுப் பாங்கில் நடந்துகொள்ளல் வேண்டும்.", - "type": "NarrativeText" - }, - { - "element_id": "9e55ede50aefd9018f64126e5d20a259", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Tatar Барлык кешеләр дә азат һәм үз абруйлары һәм хокуклары ягыннан тиң булып туалар. Аларга акыл һәм вөҗдан бирелгән һәм бер-берсенә карата туганарча [туганнарча] мөнәсәбәттә булырга тиешләр.", - "type": "NarrativeText" - }, - { - "element_id": "ca7b2ef61ad3e52b7b7873feb9ba85c1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tel" - ] - }, - "text": "Telugu ప్రతిపత్తిస్వత్వముల విషయమున మానవులెల్లరును జన్మతః స్వతంత్రులును సమానులును నగుదురు. వారు వివేచన-అంతఃకరణ సంపన్నులగుటచే పరస్పరము భ్రాతృభావముతో వర్తింపవలయును.", - "type": "NarrativeText" - }, - { - "element_id": "8947e9ec5ba76eabce3e2d1e59437be7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ces" - ] - }, - "text": "Tem Bánlʊrʊ́ʊ ɩrʊ́ báa weení na kezéńbíídi gɛ bɩka bɛdɛ́ɛ ɖɔɔzɩ́tɩ na yíkowá kɛgɛ́ɛ ɖéyí-ɖéyí gɛ. Bɔwɛná laakárɩ na ɩrʊ́tɩ bɩka bɩɩbɔ́ɔ́zɩ bɔcɔɔná ɖamá koobíre cɔwʊrɛ.", - "type": "NarrativeText" - }, - { - "element_id": "bbf40bd7dea22eb8d81c9b52157a9d78", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fin", - "ind", - "por" - ] - }, - "text": "Tetun Ema hotu hotu moris hanesan ho dignidade ho direitu. Sira hotu iha hanoin, konsiensia n'e duni tenki hare malu hanesan espiritu maun-alin.", - "type": "NarrativeText" - }, - { - "element_id": "236b9fa6cdadaf909a113daabd6db99e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "fin" - ] - }, - "text": "Tetun Dili Ema tomak moris hanesan, ema tomak hanesan, iha direitu hanesan. Ema tomak iha otak ho neon, hotu-hotu sei buka moris hanesan maun ho alin.", - "type": "NarrativeText" - }, - { - "element_id": "4db44873f89a9eec246259109ee43eb2", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Thai", - "type": "Title" - }, - { - "element_id": "70fb4fd148b0adc870bad4cf3a004e9e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tha" - ] - }, - "text": "มนุษย์ทั้งหลายเกิดมามีอิสระและเสมอภาคกันในเกียรติศักด[เกียรติศักดิ์]และสิทธิ ต่างมีเหตุผลและมโนธรรม และควรปฏิบัติต่อกันด้วยเจตนารมณ์แห่งภราดรภาพ", - "type": "Title" - }, - { - "element_id": "841a49f9951dd2eb6b4d31049732b46a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Thai (2)", - "type": "Title" - }, - { - "element_id": "a4b136507e5ed6666129c7a44794fd18", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tha" - ] - }, - "text": "มนุษย์ทั้งปวงเกิดมามีอิสระและเสมอภาคกันในศักดิ์ศรีและสิทธิ ต่างในตนมีเหตุผลและมโนธรรม และควรปฏิบัติต่อกันด้วยจิตวิญญาณแห่งภราดรภาพ", - "type": "Title" - }, - { - "element_id": "8f52798dd21c8472bda701088f7e82ca", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Themne A kom aŋfəm akəpet bɛ ŋa athənʌnɛ yi rʌwankom. Ɔwa aŋ ba məmari məthənʌnɛ. Ɔwa aŋ ba məfith yi təchemp. Chiyaŋ, aŋ yi təkə gbasi aŋkos ŋaŋ mɔ kəpa ŋa təkom.", - "type": "NarrativeText" - }, - { - "element_id": "5d770ab20285b54fa9896cd9f26b3d43", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Tibetan, Central", - "type": "Title" - }, - { - "element_id": "9ff7c25da02c27eefccdaca502af53c1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "འགྲོ་བ་མིའི་རིགས་རྒྱུད་ཡོངས་ལ་སྐྱེས་ཙམ་ཉིད་ནས་ཆེ་མཐོངས་དང༌། ཐོབ་ཐངགི་རང་དབང་འདྲ་མཉམ་དུ་ཡོད་ལ། ཁོང་ཚོར་རང་བྱུང་གི་བློ་རྩལ་དང་བསམ་ཚུལ་བཟང་པོ་འདོན་པའི་འོས་བབས་ཀྱང་ཡོད། དེ་བཞིན་ཕན་ཚུན་གཅིག་གིས་གཅིག་ལ་བུ་སྤུན་གྱི་འདུ་ཤེས་འཛིན་པའི་བྱ་སྤྱོད་ཀྱང་ལག་ལེན་བསྟར་དགོས་པ་ཡིན༎", - "type": "Title" - }, - { - "element_id": "8af88623529d7fac1f9e181cf1759b64", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur", - "vie", - "som", - "por" - ] - }, - "text": "Ticuna Ngẽxguma nabuxgu i duü̃xü̃gü rü guxü̃ma nawüxigu, rü tataxuma ya texé ya togüarü yexera ixĩsẽ. Rü guxü̃ma naxããẽgü rü ngẽmaca̱x rü name nixĩ na nügümaã namecümaxü̃ ĩ guxü̃ma ĩ duü̃xü̃gü.", - "type": "NarrativeText" - }, - { - "element_id": "3a1e54e52c1e8f2960b9f52ba81d5b61", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Tigrigna ብመንፅር ክብርን መሰልን ኩሎም ሰባት እንትውለዱ ነፃን ማዕሪን እዮም፡፡ ምስትውዓልን ሕልናን ዝተዓደሎም ብምዃኖም ንሕድሕዶም ብሕውነታዊ መንፈስ ክተሓላለዩ ኦለዎም፡፡", - "type": "UncategorizedText" - }, - { - "element_id": "de5f844ed144e72bc158df450d496c1f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Tiv I mar maor ken kpan ga, nan ngu a icivir man mbamkpeiyol cii. I na nan mhen man ishima i kaven kwagh; nahan gba keng u nana tema a orgen ken mtem u angbian a angbian.", - "type": "NarrativeText" - }, - { - "element_id": "dce66eb1491ee0e05782cd7b4060bdf1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "cat" - ] - }, - "text": "Toba 'Enauac na naaxat shiỹaxauapi na mayipi huesochiguii qataq 'eeta'a't da l'amaqchic qataq da 'enec qataq ỹataqta ỹaỹate'n naua lataxaco qataq nua no'o'n nvilỹaxaco, qaq ỹoqo'oyi iuen da i 'oonolec ỹataqta itauan ichoxoden ca lỹa", - "type": "NarrativeText" - }, - { - "element_id": "d4b675c94f0bd52682c828f5060488a5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "slv", - "hrv", - "est" - ] - }, - "text": "Tojolabal Spetsanal ja swinkil ja lu’um k’inali junxta wax jul schonjel, sok ja sijpanub’ali, ja yuj ojni b’ob’ sk’u’luk ja jas sk’ana-i ja b’as lekilali, ja yuj ja ay sk’ujoli sok ay spensari t’ilan oj yilsb’aje lek sok ja smoj jumasa.", - "type": "NarrativeText" - }, - { - "element_id": "baa6d468177ac269426d4a3d0b02bb93", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "ind" - ] - }, - "text": "Tok Pisin Yumi olgeta mama karim umi long stap fri na wankain long wei yumi lukim i gutpela na strepela tru. Uumi olgeta igat ting ting bilong wanem samting I rait na rong na mipela olgeta I mas mekim gutpela pasin long ol narapela long tingting bilong brata susa.", - "type": "NarrativeText" - }, - { - "element_id": "a264ccf464d28bf6d1ca7b13ae28ec33", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som", - "tgl" - ] - }, - "text": "Tonga Bantu boonse balazyalwa kabaangulukide alimwi kabeelene alimwi akwaanguluka kucita zyobayanda. Balazyalwa amaanu akuyeeya, aakusala alimwi beelede kulanga bambi mbuli banabokwabo.", - "type": "NarrativeText" - }, - { - "element_id": "11c1506a0e4eb0a3616787ebc32828da", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "fin" - ] - }, - "text": "Tongan Ko e kotoa ‘o ha’a tangata ‘oku fanau’i mai ‘oku tau’ataina pea tatau ‘i he ngeia mo e ngaahi totonu. Na’e fakanaunau’i kinautolu ‘aki ‘a e ‘atamai mo e konisenisi pea ‘oku totonu ke nau feohi ‘i he laumalie ‘o e nofo fakatautehina.", - "type": "NarrativeText" - }, - { - "element_id": "dab04d4ff36fa1a54202c63fe319d7f5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Totonac, Papantla Wakg lakch'ixkuwin talakgawan nak ka'unin niti ka'akgch'apawalinit nachuna wakg takg'alhi ixtamaxanatkan chu tu kaminini, je'e wanp'utun xlakata wakg talakpuwanan, talalakgk'atsan liwakg, talakask'ini xlakata wakg natalamakgtaya.", - "type": "NarrativeText" - }, - { - "element_id": "49ac7c418a1a33c64e2c3e228669acea", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Tsonga (Mozambique) Vanhu hin'kwavu va psaliwili na va khululekìle, funthsi va fana hi lisima ni tinfaneno. Và psaliwili ni nyiko ya ku pimisa ni ku yehleketa; hi kolahu, va fanela ku hanya hi moya wa umbìlu ni unghani.", - "type": "NarrativeText" - }, - { - "element_id": "d67ec59f83da37904b828e20b79c6f98", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "est" - ] - }, - "text": "Tsonga (Zimbabwe) Vanhu hinkwavo va tswariwa va tshunxekile naswona va ringanile eka tifanelo na xindzhuti. Va havaxerile miehleketo na tshiriti kumbe ku tiva xo biha ni xta kahle nakambe va fanele va kombana moya wa vukwavo.", - "type": "NarrativeText" - }, - { - "element_id": "7d60ead79ecb4818a244fb15daa7b691", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "afr" - ] - }, - "text": "Tswana Batho botlhe ba tsetswe ba gololosegile le go lekalekana ka seriti le ditshwanelo. Ba abetswe go akanya le maikutlo, mme ba tshwanetse go direlana ka mowa wa bokaulengwe.", - "type": "NarrativeText" - }, - { - "element_id": "3ecfed863a5eed35ac7bcdc4f1ebcf6d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Turkish Bütün insanlar hür, haysiyet ve haklar bakımından eşit doğarlar. Akıl ve vicdana sahiptirler ve birbirlerine karşı kardeşlik zihniyeti ile hareket etmelidirler.", - "type": "NarrativeText" - }, - { - "element_id": "ec6b4429d4b16c9725f0f1420314a928", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Turkmen (Cyrillic) Хемме адамлар өз мертебеси ве хукуклары боюнча дең ягдайда дүнйә инйәрлер. Олара аң хем выҗдан берлендир ве олар бир‐бирлери билен доганлык рухундакы гарайышда болмалыдырлар.", - "type": "NarrativeText" - }, - { - "element_id": "27683edb29bca811bea3008052c0fc9f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur" - ] - }, - "text": "Turkmen (Latin) Adamlaryň hemmesi azat dogulýarlar we öz mertebesi hem‐de hukuklary boýunça ilkibaşdan deňdirler. Olara ozal‐başdan aň, ynsap berlendir we biri‐birine özara doganlyk ruhunda çemeleşmek olaryň ýaraşygydyr.", - "type": "NarrativeText" - }, - { - "element_id": "6b9f05c9e0fdf0e6de36b54f1c82f5d0", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Tuva Бүгү кижилер хостуг база мөзүзү болгаш эргелери дең кылдыр төрүттүнер. Оларга угаансарыыл болгаш арын-нүүр бердинген болур болгаш олар бот-боттарынга акы-дуңмалышкы хамаарылганы көргүзер ужурлуг.", - "type": "NarrativeText" - }, - { - "element_id": "527f7d8b2d19b7c6c3f2fadc70ada262", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som" - ] - }, - "text": "Twi (Akuapem) Wɔawo adesamma nyinaa sɛ nnipa a wɔwɔ ahofadi. Wɔn nyinaa wɔ nidi ne kyɛfa koro. Wɔwɔ adwene ne ahonim, na ɛsɛ sɛ wobu wɔn ho wɔn ho sɛ anuanom.", - "type": "NarrativeText" - }, - { - "element_id": "aefbdde1da2ecc73208751b3c330bb3e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som" - ] - }, - "text": "Twi (Asante) Nnipa nyinaa yɛ pɛ. Na wɔde adwene ne nyansa na abɔ obiara. Ɛno nti, ɛsɛ sɛ obiara dɔ ne yɔnko, bu ne yɔnko, di ne yɔnko ni.", - "type": "NarrativeText" - }, - { - "element_id": "4b0bd8eaae3f12feed9188c010027eb7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "swa" - ] - }, - "text": "Tzeltal, Oxchuc Spisil winiketik te ya xbejk´ajik ta k´inalil ay jrerechotik, mayuk mach´a chukul ya xbejka, ya jnatik stojol te jpisiltik ay snopibal sok sbijil joltik, ja´ me k´ux ya kaibatik ta jujun tul.", - "type": "NarrativeText" - }, - { - "element_id": "3a1d8b7b6302ae4de3c1c05a5c4f8fc7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "slv", - "ind" - ] - }, - "text": "Tzotzil (Chamula) Skotol vinik o ants ta spejel balumile k’olem x-hayan i ko’ol ta sch’ulal i sderechoetik i, skotol k’ux-elan oyike oy srasonik y slekilalik, sventa skuxijik leknóo ta ju jun ju ju vo.", - "type": "NarrativeText" - }, - { - "element_id": "9c8ce1a1d4b031909f2b8d5c31bc3084", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "som" - ] - }, - "text": "Uduk Aris ’kwaniny’ceshi ’baar mo dho’thkunu ’baḵany mo dhali mmomiiya ṯu’c imonṯal ’de/ mo dhali mii ma ḵar/e mo. Uni mini ta gi gwo mo dhali mii mo dhali uni mini mii ka karambuye/ ’kup̱ ki cin tiya mo e shi/in mo dhali mii kun tanu ikam mo.", - "type": "NarrativeText" - }, - { - "element_id": "35ad852b028b17863397cd23a741e776", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ukr" - ] - }, - "text": "Ukrainian Всі люди народжуються вільними і рівними у своїй гідності та правах. Вони наділені розумом і совістю і повинні діяти у відношенні один до одного в дусі братерства.", - "type": "NarrativeText" - }, - { - "element_id": "2da70f2c0e7850d3cb64606cb0479fc9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "hrv", - "afr" - ] - }, - "text": "Umbundu Omanu vosi vacitiwa valipwa kwenda valisoka kovina vyosikwenda komoko. Ovo vakwete esunga kwenda, kwenda olondunge kwenje ovo vatêla okuliteywila kuvamwe kwenda vakwavo vesokolwilo lyocisola.", - "type": "NarrativeText" - }, - { - "element_id": "64edd93c5fb9067ee95bf26fb4495608", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "fin" - ] - }, - "text": "Umbundu (011) Omanu vosi kilu lieve va citiwa lonjila yimosi leyovo limosi, lomoko yimosi kuenda unu umosi, kuenje momo vosi va kuete olondunge, va sesamela okulisumbila pokati ndavamanji.", - "type": "NarrativeText" - }, - { - "element_id": "1124c31d72fecef43b62a1802dfaa8b8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ron", - "fin" - ] - }, - "text": "Urarina Ita rijiicha itolere cacha. Aihana jaun, ita belaain, naojoain neuruhine laurilaurichuru nenacaauru aina itolere cachaauru.", - "type": "NarrativeText" - }, - { - "element_id": "17e2b5b5c80c984c98843bbed39884c4", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "urd" - ] - }, - "text": "Urdu تمام انسان آزاد اور حقوق و عزت کے اعتبار سے برابر پیدا ہوئے ہیں۔ انہیں ضمیر اور عقل ودیعت ہوئی ہے۔ اس لئے انہیں ایک دوسرے کے ساتھ بھائی چارے کا سلوک کرنا چاہیئے۔", - "type": "UncategorizedText" - }, - { - "element_id": "64062747e4a49e81a0ff7fe76c935f92", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "urd" - ] - }, - "text": "Urdu (2) تمام انسان آزاد اور حقوق و عزت کے اعتبار سے برابر پیدا ہوئے ہیں۔ انہیں ضمیر اور عقل ودیعت ہوئی ہے۔ اس لیے انہیں ایک دوسرے کے ساتھ بھائی چارے کا سلوک کرنا چاہیے۔", - "type": "UncategorizedText" - }, - { - "element_id": "c0f369076ccc7b4f6949b46f78e9c721", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ara" - ] - }, - "text": "Uyghur (Arabic) ھەممە ئادەم زانىدىنلا ئەركىن، ئىززەت-ھۆرمەت ۋە ھوقۇقتا باپباراۋەر بولۇپ تۇغۇلغان. ئۇلار ئەقىلغە ۋە ۋىجدانغا ئىگە ھەمدە بىر-بىرىگە قېرىنداشلىق مۇناسىۋىتىگە خاس روھ بىلەن موئامىلە قىلىشى كېرەك.", - "type": "NarrativeText" - }, - { - "element_id": "c9695addaae400cf93180490aae4c5b8", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "eng", - "deu", - "tur" - ] - }, - "text": "Uyghur (Latin) hemme adem zatidinla erkin, izzet-hörmet we hoquqta babbarawer bolup tughulghan. ular eqilghe we wijdan'gha ige hemde bir-birige qérindashliq munasiwitige xas roh bilen muamile qilishi kérek.", - "type": "NarrativeText" - }, - { - "element_id": "cf037543ae7e29089220134bd8d9fc80", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "mkd" - ] - }, - "text": "Uzbek, Northern (Cyrillic) Барча одамлар эркин, қадр‐қиммат ва ҳуқуқларда тенг бўлиб туғиладилар. Улар ақл ва виждон соҳибидирлар ва бир‐бирларига биродарларча муомала қилишлари зарур.", - "type": "NarrativeText" - }, - { - "element_id": "f96f007fae71f3dbb5cf107a67339f62", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tur", - "ind" - ] - }, - "text": "Uzbek, Northern (Latin) Barcha odamlar erkin, qadr‐qimmat va huquqlarda teng boʻlib tugʻiladilar. Ular aql va vijdon sohibidirlar va bir‐birlariga birodarlarcha muomala qilishlari zarur.", - "type": "NarrativeText" - }, - { - "element_id": "4309a801882998d4a87ec4393c62eb5b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain" - }, - "text": "Vai ꕉꕜꕮ ꔔꘋ ꖸ ꔰ ꗋꘋ ꕮꕨ ꔔꘋ ꖸ ꕎ ꕉꖸꕊ ꕴꖃ ꕃꔤꘂ ꗱ, ꕉꖷ ꗪꗡ ꔻꔤ ꗏꗒꗡ ꕎ ꗪ ꕉꖸꕊ ꖏꕎ. ꕉꕡ ꖏ ꗳꕮꕊ ꗏ ꕪ ꗓ ꕉꖷ ꕉꖸ ꕘꕞ ꗪ. ꖏꖷ ꕉꖸꔧ ꖏ ꖸ ꕚꕌꘂ ꗷꔤ ꕞ ꘃꖷ ꘉꔧ ꗠꖻ ꕞ ꖴꘋ ꔳꕩ ꕉꖸ ꗳ.", - "type": "NarrativeText" - }, - { - "element_id": "8874ff5275f95f22ade2d05b19b84596", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Venda Vhathu vhoṱhe vha bebwa vhe na mbofholowo nahone vha tshi lingana siani ḽa tshirunzi na pfanelo. Vhathu vhoṱhe vho ṋewa mihumbulo na mvalo ngauralo vha tea u konou farana sa vhathu vhathihi.", - "type": "NarrativeText" - }, - { - "element_id": "1b4e3e7ad00ef96ec0938e98c22ac4d7", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Venda Vhathu vhoṱhe vha bebwa vhe na mbofholowo nahone vha tshi lingana siani ḽa tshirunzi na pfanelo. Vhathu vhoṱhe vho ṋewa mihumbulo na mvalo ngauralo vha tea u konou farana sa vhathu vhathihi.", - "type": "NarrativeText" - }, - { - "element_id": "57f8d88a5300439c2e78d95d9954dd1b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ita", - "por" - ] - }, - "text": "Venetian Tuti i èsari umani i nase łìbari e conpanji par dinjità e deriti. I ze dotài de rajon e de cosiensa e i ga da conportarse intrà de łori co spìrito de fradełi.", - "type": "NarrativeText" - }, - { - "element_id": "bde94a10001841ef9fad0f19311e6fa9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "est", - "slv" - ] - }, - "text": "Veps Kaik mehed sünduba joudajin i kohtaižin, ühtejiččin ičeze arvokahudes i oiktusiš. Heile om anttud mel’ i huiktusentund i heile tariž kožuda toine toiženke kut vel’l’kundad.", - "type": "NarrativeText" - }, - { - "element_id": "c6836fc94a9a2261da5605eae88ea21f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "vie" - ] - }, - "text": "Vietnamese Tất cả mọi người sinh ra đều được tự do và bình đẳng về nhân phẩm và quyền. Mọi con người đều được tạo hoá ban cho lý trí và lương tâm và cần phải đối xử với nhau trong tình bằng hữu.", - "type": "NarrativeText" - }, - { - "element_id": "485844252e980bfa24ace2ee02a33f60", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "Vietnamese (Han nom)", - "type": "Title" - }, - { - "element_id": "294055dfb0c1131395070d727e81fde6", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "kor", - "zho" - ] - }, - "text": "畢哿每𠊛生𠚢調得自由吧平等𧗱人品吧權。每𡥵𠊛調得造化頒朱理智吧良心吧勤沛對處𢭲膮𥪝情朋友。", - "type": "Title" - }, - { - "element_id": "4ab64de143568003ad62ca2cf3c8cda3", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som", - "swa" - ] - }, - "text": "Waama Yiriba na bà sikindo dare bà mɛɛri, da seena yirimma mii bà ta da i nɛki bà tɔɔba.", - "type": "NarrativeText" - }, - { - "element_id": "88700f6c9f719c0f7ad537b0fe24d46d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "fra" - ] - }, - "text": "Walloon Tos lès-omes vinèt-st-å monde lîbes, èt so-l'minme pîd po çou qu'ènn'èst d'leu dignité èt d'leus dreûts. I n'sont nin foû rêzon èt-z-ont-i leû consyince po zèls, çou qu'èlzès deût miner a s'kidûre onk' po l'ôte tot come dès frés.", - "type": "NarrativeText" - }, - { - "element_id": "4cc64fb61f5a74a71a90733404e04f4d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "tgl" - ] - }, - "text": "Waorani Tomamo waomo ekame wee anamay inani tomemo kewengi beye tomamo neemompa noynga impa aye anobay impa wadani inanite wakeki beye angampa.", - "type": "NarrativeText" - }, - { - "element_id": "800591b9820914f6f735094c30dd8c74", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl" - ] - }, - "text": "Waray-Waray Nga an ngatanan nga mga tawo, nahimugso talwas ug katpong ha ira dignidad ug katdungan. Hira natawo dinhi ha tuna mayda konsensya ug isip ug kaangayan gud la nga an ira pagtagad ha tagsatagsa sugad hin magburugto.", - "type": "NarrativeText" - }, - { - "element_id": "25c9bb862536e9e520792ea8724608de", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Wayuu Naa wayuukana jemeishi süpüla taashi süma wanawa sülu'u nakua'ipa, aka müin yaa epijainjana sünain anajiranawaa a'in nama napüshi.", - "type": "NarrativeText" - }, - { - "element_id": "b4265fbb8924aeeb84569e7b2e4e3197", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cym" - ] - }, - "text": "Welsh Genir pawb yn rhydd ac yn gydradd â’i gilydd mewn urddas a hawliau. Fe’u cynysgaeddir â rheswm a chydwybod, a dylai pawb ymddwyn y naill at y llall mewn ysbryd cymodlon.", - "type": "NarrativeText" - }, - { - "element_id": "8799ac3c8264dbd02b24e5484e28ea2d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "ind", - "som" - ] - }, - "text": "Wolof Doomi aadama yépp danuy juddu, yam ci tawfeex ci sag ak sañ-sañ. Nekk na it ku xam dëgg te ànd na ak xelam, te war naa jëflante ak nawleen, te teg ko ci wàllu mbokk.", - "type": "NarrativeText" - }, - { - "element_id": "e341d3f3abdd74df58471e96e0695a4b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind" - ] - }, - "text": "Xhosa Bonke abantu bazalwa bekhululekile belingana ngesidima nangokweemfanelo. Bonke abantu banesiphiwo sesazela nesizathu sokwenza isenzo ongathanda ukuba senziwe kumzalwane wakho.", - "type": "NarrativeText" - }, - { - "element_id": "64fadcc858963f78ddee929df92dd08e", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "hrv", - "tur", - "ind" - ] - }, - "text": "Yagua Ne sarupay nijyami cumudeju darvantyamuy javatyasjiu. Jachipiyadati mirvara samirva, mirvamuy ne samirva. Ramunltiy sarivichanichara samirvariy jityunu vichavay.", - "type": "NarrativeText" - }, - { - "element_id": "b1da3b28878be3ee9c9045f0c9223c84", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Yakut Дьон барыта бэйэ суолтатыгар уонна быраабыгар тэҥ буолан төрүүллэр. Кинилэр бары өркөн өйдөөх, суобастаах буолан төрүүллэр, уонна бэйэ бэйэлэригэр тылга кииринигэс быһыылара доҕордоһуу тыыннаах буолуохтаах.", - "type": "NarrativeText" - }, - { - "element_id": "53f4d4779755796c4b53e9945f211ced", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "spa", - "eng" - ] - }, - "text": "Yaneshaʼ Allohueney ñeñtey arromñatey att̃o ye'ñalletyesa arr patsro e'ñe att̃ecma cohuen yesherb̃a'yen. Ñam̃a yechyen allpon derechos att̃och e'ñech cohueno'tsa'yeney arr patsro. Ñam̃a allohuen att̃ecma yechyen alloch yoct̃ape' chyen cohuen ñam̃a yeñotyen yeyoc̈hro ñeñt ̃e'ne pocte' enten acheñenesha' ñam̃a ñeñt ̃ama pocteye' enteneto. Yeñoteñ añ poctetsa e'ñe yemo'nasheñ yep̃annena ama't ora allohuen allpon acheñenesha' ñeñt ̃añe patsro'tsa'yeney.", - "type": "NarrativeText" - }, - { - "element_id": "1484d1c7c562268257922f9f0522d183", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "sqi" - ] - }, - "text": "Yanomamö Kõmi thë pë rë përiprawë rë piyëkëi, he usukuwë thë pë keprou ai thë ã rëamaihã no ã heparohowë, totihitawë thë pë riã rẽ thaiwehei hami, thë pë puhi tao kãi përihiwëha, thë pë puhi kãi katehewëha hawë kama thë pë mashi shĩro përihimopë.", - "type": "NarrativeText" - }, - { - "element_id": "f0d349994b009758a91544c530c1cdd1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Yao Wandu wosope akasapagwa ni ufulu ni uchimbichimbi wakulandana. Asapagwa ni lunda, niwakupakombola ganisya, m'yoyo kukusosekwa kuti mundu jwalijose am'woneje mundu jwimwe mpela mlongomjakwe.", - "type": "NarrativeText" - }, - { - "element_id": "6e2772e24613e482dbe3ec725643ea7a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "tgl", - "cym" - ] - }, - "text": "Yapese Gubine gidii mani gargeleg nga faileng nibapuf mattʼawen nge rogon. Bay laniyan nipii e nam, ere ngauda ted mattʼaawen e chaa niba chugur ngoded nimod walag dad.", - "type": "NarrativeText" - }, - { - "element_id": "dd0ec8c9f26cfc60d56857c55e78705f", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "heb" - ] - }, - "text": "Yiddish, Eastern יעדער מענטש װערט געבױרן פֿרײַ און גלײַך אין כּבֿוד און רעכט. יעדער װערט באַשאָנקן מיט פֿאַרשטאַנד און געװיסן; יעדער זאָל זיך פֿירן מיט אַ צװײטן אין אַ געמיט פֿון ברודערשאַפֿט.", - "type": "NarrativeText" - }, - { - "element_id": "33533cecec6c5714680925cbc9d55bb1", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "vie" - ] - }, - "text": "Yoruba Gbogbo ènìyàn ni a bí ní òmìnira; iyì àti ẹ̀tọ́ kọ̀ọ̀kan sì dọ́gba. Wọ́n ní ẹ̀bùn ti làákàyè àti ti ẹ̀rí‐ọkàn, ó sì yẹ kí wọn ó máa hùwà sí ara wọn gẹ́gẹ́ bí ọmọ ìyá.", - "type": "NarrativeText" - }, - { - "element_id": "263ae4a61b51cca14085f92de5a8cfa5", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "rus" - ] - }, - "text": "Yukaghir, Northern Көдэҥ тэн - ньидитэ бандьэ параԝааньэрэҥ тудэ чуҥдэн ньилдьилэк эннулҥинь-мэдьуолнуни. Көдэҥ энмун чундэ мэ льэй, таатльэр лукундьии ньинэмдьийилпэ дитэ эннуйуол-мораԝньэҥи.", - "type": "NarrativeText" - }, - { - "element_id": "5d93ef013b9a5b75709657ba49153ed9", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa" - ] - }, - "text": "Záparo Kawiriaja kayapuina ichaukui ta nuka pucha panicha kupanimajicha cha nuka nishima ikicha kiniana panicha tamanuka kanata ikimajicha.", - "type": "UncategorizedText" - }, - { - "element_id": "7d1772a7cde57cf4033fb6ecd38d611b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "cym", - "eng", - "ind", - "ita" - ] - }, - "text": "Zapotec, Güilá Ra'ta ra bu:unny ra:aaly liebr cëhnn te'bloh deree'ch cëhnn dignidaa. Ra:alyne:erih gahll ri:e:eny cëhnn saalyb, chiru' na:a pahr ga:annza'crih loh sa'rih.", - "type": "NarrativeText" - }, - { - "element_id": "efe41cb241efcd0774cf2f9bd328b778", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "afr", - "slk", - "cat", - "ind", - "som" - ] - }, - "text": "Zapotec, Miahuatlán Diti mien ndied xa yent kuan nkie xa nak rieti xa diba xa rola.", - "type": "NarrativeText" - }, - { - "element_id": "b1bf6eb1c62dbb55df63d0dcd8595d2a", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "som" - ] - }, - "text": "Zarma Fayanka kulu no si adamayzey nda care game ra i burcintara nda i alhakey cediraw kayandiyaŋ fondo ra da i na i hay. I gonda lakkal, nda laasaabu, kaŋ ga naŋ i ma baafunay ɲayzetaray haali ra.", - "type": "NarrativeText" - }, - { - "element_id": "1c60afd50d137ef937a1579b3ead3a1d", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "nld", - "ind", - "eng" - ] - }, - "text": "Zhuang, Yongbei Boux boux ma daengz lajmbwn couh miz cwyouz, cinhyenz caeuq genzli bouxboux Bingzdaengj. gyoengq vunz miz lijsing caeuq liengzsim, wngdang daih gyoengq de lumj beixnuengx ityiengh.", - "type": "NarrativeText" - }, - { - "element_id": "ba76674b7cb1ad279f93a7027afa6ced", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "swa", - "ind", - "tgl" - ] - }, - "text": "Zulu Bonke abantu bazalwa bekhululekile belingana ngesithunzi nangamalungelo. Bahlanganiswe wumcabango nangunembeza futhi kufanele baphathane ngomoya wobunye.", - "type": "NarrativeText" - }, - { - "element_id": "3e64942cf704c27071b652681de4124b", - "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/../example-docs/language-docs/UDHR_first_article_all.txt" - }, - "filetype": "text/plain", - "languages": [ - "eng" - ] - }, - "text": "------", - "type": "UncategorizedText" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.json similarity index 61% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.json index 974b573d98..55c78ea5c0 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.json @@ -1,502 +1,552 @@ [ { + "type": "Title", "element_id": "1e41f20785644cdea2f017cfb67bb359", + "text": "Core Skills for Biomedical Data Scientists", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Core Skills for Biomedical Data Scientists", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "c915a2a57c901810a698491ca2393669", + "text": "Maryam Zaringhalam, PhD, AAAS Science & Technology Policy Fellow", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Maryam Zaringhalam, PhD, AAAS Science & Technology Policy Fellow", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b24c3f8d268b2f834a00966d8faef975", + "text": "Lisa Federer, MLIS, Data Science Training Coordinator", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Lisa Federer, MLIS, Data Science Training Coordinator", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fcff333f886b39cee0a7084a9ff9204d", + "text": "Michael F. Huerta, PhD, Associate Director of NLM for Program Development and NLM Coordinator of Data Science and Open Science Initiatives", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Michael F. Huerta, PhD, Associate Director of NLM for Program Development and NLM Coordinator of Data Science and Open Science Initiatives", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "1b86fad341db35208d75a543bcf819ae", + "text": "Executive Summary", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Executive Summary", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "fee71d4f7ef7a5f253a44f6df648d12a", + "text": "This report provides recommendations for a minimal set of core skills for biomedical data scientists based on analysis that draws on opinions of data scientists, curricula for existing biomedical data science programs, and requirements for biomedical data science jobs. Suggested high-level core skills include:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "This report provides recommendations for a minimal set of core skills for biomedical data scientists based on analysis that draws on opinions of data scientists, curricula for existing biomedical data science programs, and requirements for biomedical data science jobs. Suggested high-level core skills include:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "caa3c2eba90fedb7c8923ae8cd8de961", + "text": "1. General biomedical subject matter knowledge: biomedical data scientists should have a general working knowledge of the principles of biology, bioinformatics, and basic clinical science;", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "1. General biomedical subject matter knowledge: biomedical data scientists should have a general working knowledge of the principles of biology, bioinformatics, and basic clinical science;", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "f2ecce91323f01402aa06611385262ef", + "text": "2. Programming language expertise: biomedical data scientists should be fluent in at", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "2. Programming language expertise: biomedical data scientists should be fluent in at", - "type": "ListItem" + ] + } + } }, { + "type": "Title", "element_id": "6b6645c408540ac22f4fd4be06820271", + "text": "least one programming language (typically R and/or Python);", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "least one programming language (typically R and/or Python);", - "type": "Title" + ] + } + } }, { + "type": "ListItem", "element_id": "eb7d0e257b4f0178cdce46cd57b33dc2", + "text": "3. Predictive analytics, modeling, and machine learning: while a range of statistical methods may be useful, predictive analytics, modeling, and machine learning emerged as especially important skills in biomedical data science;", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "3. Predictive analytics, modeling, and machine learning: while a range of statistical methods may be useful, predictive analytics, modeling, and machine learning emerged as especially important skills in biomedical data science;", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "1a174e104169cb41cf69393a9cdc0872", + "text": "4. Team science and scientific communication: \u201csoft\u201d skills, like the ability to work well on teams and communicate effectively in both verbal and written venues, may be as important as the more technical skills typically associated with data science.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "4. Team science and scientific communication: “soft” skills, like the ability to work well on teams and communicate effectively in both verbal and written venues, may be as important as the more technical skills typically associated with data science.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "57314792fe7a371933b2116bc8242622", + "text": "5. Responsible data stewardship: a successful data scientist must be able to implement best practices for data management and stewardship, as well as conduct research in an ethical manner that maintains data security and privacy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "5. Responsible data stewardship: a successful data scientist must be able to implement best practices for data management and stewardship, as well as conduct research in an ethical manner that maintains data security and privacy.", - "type": "ListItem" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "26c704088ae82677871f8f8abd78459c", + "text": "The report further details specific skills and expertise relevant to biomedical data scientists.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "The report further details specific skills and expertise relevant to biomedical data scientists.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ce78773a1364f6be706f3a5b11d50179", + "text": "Motivation", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Motivation", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "690b79e1d449426afb07ed40866a6bb6", + "text": "Training a biomedical data science (BDS) workforce is a central theme in NLM\u2019s Strategic Plan for the coming decade. That commitment is echoed in the NIH-wide Big Data to Knowledge (BD2K) initiative, which invested $61 million between FY2014 and FY2017 in training programs for the development and use of biomedical big data science methods and tools. In line with", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Training a biomedical data science (BDS) workforce is a central theme in NLM’s Strategic Plan for the coming decade. That commitment is echoed in the NIH-wide Big Data to Knowledge (BD2K) initiative, which invested $61 million between FY2014 and FY2017 in training programs for the development and use of biomedical big data science methods and tools. In line with", - "type": "NarrativeText" + ] + } + } }, { + "type": "Header", "element_id": "b810a8721369c3551c942aab9011b7d1", + "text": "Core Skills for Biomedical Data Scientists _____________________________________________________________________________________________", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Core Skills for Biomedical Data Scientists _____________________________________________________________________________________________", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c8fdefac1ae82fa42caeceff04853415", + "text": "this commitment, a recent report to the NLM Director recommended working across NIH to identify and develop core skills required of a biomedical data scientist to consistency across the cohort of NIH-trained data scientists. This report provides a set of recommended core skills based on analysis of current BD2K-funded training programs, biomedical data science job ads, and practicing members of the current data science workforce.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "this commitment, a recent report to the NLM Director recommended working across NIH to identify and develop core skills required of a biomedical data scientist to consistency across the cohort of NIH-trained data scientists. This report provides a set of recommended core skills based on analysis of current BD2K-funded training programs, biomedical data science job ads, and practicing members of the current data science workforce.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "b5b7392d0a946f5016bfa8ad0c248a9b", + "text": "Methodology", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Methodology", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d9d8e38d221ae621c0ddbcabaa4a28b4", + "text": "The Workforce Excellence team took a three-pronged approach to identifying core skills required of a biomedical data scientist (BDS), drawing from:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The Workforce Excellence team took a three-pronged approach to identifying core skills required of a biomedical data scientist (BDS), drawing from:", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ba70aa3bc3ad0dec6a62939c94c5a20c", + "text": "a) Responses to a 2017 Kaggle1 survey2 of over 16,000 self-identified data scientists working across many industries. Analysis of the Kaggle survey responses from the current data science workforce provided insights into the current generation of data scientists, including how they were trained and what programming and analysis skills they use.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "a) Responses to a 2017 Kaggle1 survey2 of over 16,000 self-identified data scientists working across many industries. Analysis of the Kaggle survey responses from the current data science workforce provided insights into the current generation of data scientists, including how they were trained and what programming and analysis skills they use.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "24724b1f0d20a6575f2782fd525c562f", + "text": "b) Data science skills taught in BD2K-funded training programs. A qualitative content analysis was applied to the descriptions of required courses offered under the 12 BD2K-funded training programs. Each course was coded using qualitative data analysis software, with each skill that was present in the description counted once. The coding schema of data science-related skills was inductively developed and was organized into four major categories: (1) statistics and math skills; (2) computer science; (3) subject knowledge; (4) general skills, like communication and teamwork. The coding schema is detailed in Appendix A.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "b) Data science skills taught in BD2K-funded training programs. A qualitative content analysis was applied to the descriptions of required courses offered under the 12 BD2K-funded training programs. Each course was coded using qualitative data analysis software, with each skill that was present in the description counted once. The coding schema of data science-related skills was inductively developed and was organized into four major categories: (1) statistics and math skills; (2) computer science; (3) subject knowledge; (4) general skills, like communication and teamwork. The coding schema is detailed in Appendix A.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5e6c73154a1e5f74780c69afbc9bc084", + "text": "c) Desired skills identified from data science-related job ads. 59 job ads from government (8.5%), academia (42.4%), industry (33.9%), and the nonprofit sector (15.3%) were sampled from websites like Glassdoor, Linkedin, and Ziprecruiter. The content analysis methodology and coding schema utilized in analyzing the training programs were applied to the job descriptions. Because many job ads mentioned the same skill more than once, each occurrence of the skill was coded, therefore weighting important skills that were mentioned multiple times in a single ad.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "c) Desired skills identified from data science-related job ads. 59 job ads from government (8.5%), academia (42.4%), industry (33.9%), and the nonprofit sector (15.3%) were sampled from websites like Glassdoor, Linkedin, and Ziprecruiter. The content analysis methodology and coding schema utilized in analyzing the training programs were applied to the job descriptions. Because many job ads mentioned the same skill more than once, each occurrence of the skill was coded, therefore weighting important skills that were mentioned multiple times in a single ad.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "249f6c76b2c99dadbefb8b8811b0d4cd", + "text": "Analysis of the above data provided insights into the current state of biomedical data science training, as well as a view into data science-related skills likely to be needed to prepare the BDS workforce to succeed in the future. Together, these analyses informed recommendations for core skills necessary for a competitive biomedical data scientist.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Analysis of the above data provided insights into the current state of biomedical data science training, as well as a view into data science-related skills likely to be needed to prepare the BDS workforce to succeed in the future. Together, these analyses informed recommendations for core skills necessary for a competitive biomedical data scientist.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6543ce4e447de8fb3db98ceb06a50c28", + "text": "1 Kaggle is an online community for data scientists, serving as a platform for collaboration, competition, and learning: http://kaggle.com 2 In August 2017, Kaggle conducted an industry-wide survey to gain a clearer picture of the state of data science and machine learning. A standard set of questions were asked of all respondents, with more specific questions related to work for employed data scientists and questions related to learning for data scientists in training. Methodology and results: https://www.kaggle.com/kaggle/kaggle-survey-2017", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "1 Kaggle is an online community for data scientists, serving as a platform for collaboration, competition, and learning: http://kaggle.com 2 In August 2017, Kaggle conducted an industry-wide survey to gain a clearer picture of the state of data science and machine learning. A standard set of questions were asked of all respondents, with more specific questions related to work for employed data scientists and questions related to learning for data scientists in training. Methodology and results: https://www.kaggle.com/kaggle/kaggle-survey-2017", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "1a6ff96d028f18331a9d9c9748b49321", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "2", - "type": "Footer" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.json similarity index 100% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.json diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.json similarity index 55% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.json index 5af52f7379..5c4a058d2a 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.json @@ -1,3827 +1,4195 @@ [ { + "type": "Header", "element_id": "782cf07be8b3ab8f05188e479edb7f61", + "text": "Data in Brief 22 (2019) 451\u2013457", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "DatainBrief22 ( 2019 ) 451 –", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "text": "DatainBrief22 ( 2019 ) 451 \u2013", + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 } ], - "page_number": 1 - }, - "text": "Data in Brief 22 (2019) 451–457", - "type": "Header" - }, - { - "element_id": "c3e4ba0411db419c34f27ae55762b1c1", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "c3e4ba0411db419c34f27ae55762b1c1", + "text": "Contents lists available at ScienceDirect", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": -1, "text": "", - "url": "www.sciencedirect.com/science/journal/23523409" + "url": "www.sciencedirect.com/science/journal/23523409", + "start_index": -1 } ], - "page_number": 1 - }, - "text": "Contents lists available at ScienceDirect", - "type": "NarrativeText" - }, - { - "element_id": "a983d2e46059a8605ebb1077994e6fa3", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Data in Brief", - "type": "Title" + ] + } + } }, { - "element_id": "354cd2b49c1a201a5e91177a17f9b2a3", + "type": "Title", + "element_id": "a983d2e46059a8605ebb1077994e6fa3", + "text": "Data in Brief", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "Title", + "element_id": "354cd2b49c1a201a5e91177a17f9b2a3", + "text": "journal homepage: www.elsevier.com/locate/dib", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 18, "text": "www . elsevier . com / locate /", - "url": "www.elsevier.com/locate/dib" + "url": "www.elsevier.com/locate/dib", + "start_index": 18 } ], - "page_number": 1 - }, - "text": "journal homepage: www.elsevier.com/locate/dib", - "type": "Title" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Title", "element_id": "c1c1eeb08eba1d16beccf2034fc87bc8", + "text": "Data Article", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Data Article", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f1b37e8056f39eb82901f43f4fe0a239", + "text": "Data on environmental sustainable corrosion inhibitor for stainless steel in aggressive environment", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Data on environmental sustainable corrosion inhibitor for stainless steel in aggressive environment", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "1a4fcf35fcd5d2be9f843f0fb93f3d3e", + "text": "Omotayo Sanni n, Abimbola Patricia I. Popoola", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Omotayo Sanni n, Abimbola Patricia I. Popoola", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "418af174cd1457a5db9b88c3c4a33ce3", + "text": "Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "698747e1178c3e0ec15b2eb293e58565", + "text": "a r t i c l e i n f o", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "a r t i c l e i n f o", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "19e64efbeabe463d8d8a6f577d4c6be7", + "text": "a b s t r a c t", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "a b s t r a c t", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8e23ddc47eb2833b067fe61c9c413955", + "text": "Article history: Received 31 August 2018 Received in revised form 17 November 2018 Accepted 27 November 2018 Available online 30 November 2018", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Article history: Received 31 August 2018 Received in revised form 17 November 2018 Accepted 27 November 2018 Available online 30 November 2018", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2b0eb4fb8b32b5944bcf711f448ef19a", + "text": "Keywords: Corrosion Stainless steel Inhibitor Sulphuric acid", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Keywords: Corrosion Stainless steel Inhibitor Sulphuric acid", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "8930d3f5d6929e72cbe35523538fc807", + "text": "This data article contains data related to the research article entitled \u201cenhanced corrosion resistance of stainless steel Type 316 in sulphuric acid solution using eco-friendly waste product\u201d (Sanni et al., 2018). In this data article, a comprehensive effect of waste product and optimized process parameter of the inhibitor in 0.5 M H2SO4 solution was presented using weight loss and potentiody- the inhibitor namic polarization techniques. The presence of (egg shell powder) in\ufb02uenced corrosion resistance of stainless steel. Inhibition ef\ufb01ciency value of 94.74% was recorded as a result of inhibition of the steel by the ionized molecules of the inhibiting compound of the egg shell powder in\ufb02uencing the redox mechan- ism reactions responsible for corrosion and surface deterioration.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "This data article contains data related to the research article entitled “enhanced corrosion resistance of stainless steel Type 316 in sulphuric acid solution using eco-friendly waste product” (Sanni et al., 2018). In this data article, a comprehensive effect of waste product and optimized process parameter of the inhibitor in 0.5 M H2SO4 solution was presented using weight loss and potentiody- the inhibitor namic polarization techniques. The presence of (egg shell powder) influenced corrosion resistance of stainless steel. Inhibition efficiency value of 94.74% was recorded as a result of inhibition of the steel by the ionized molecules of the inhibiting compound of the egg shell powder influencing the redox mechan- ism reactions responsible for corrosion and surface deterioration.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "aa8a123d8b7bf47bd15c389a6685d405", + "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "0757794849e2cca941b30b4e1e82cd4b", + "text": "Speci\ufb01cation table", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Specification table", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "bab7909d0362404432e0cc4f90049b3a", + "text": "Subject area More speci\ufb01c subject area Surface science and engineering Type of data", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Subject area More specific subject area Surface science and engineering Type of data", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "227863137634b2d549494fac759af715", + "text": "Materials engineering", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Materials engineering", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3f88b0d8c42101ff25aeb213051cf81f", + "text": "Table and \ufb01gure", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Table and figure", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b6664d832b0c853cff911e63ce738371", + "text": "n Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "n Corresponding author. tayo.sanni@yahoo.com; SanniO@tut.ac.za", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "9b655d4b82dc2b1d75b9c21c7b0fc7f8", + "text": "E-mail address: tayo.sanni@yahoo.com (O. Sanni).", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 16, "text": "tayo . sanni @ yahoo . com", - "url": "mailto:tayo.sanni@yahoo.com" + "url": "mailto:tayo.sanni@yahoo.com", + "start_index": 16 } ], - "page_number": 1 - }, - "text": "E-mail address: tayo.sanni@yahoo.com (O. Sanni).", - "type": "Title" - }, - { - "element_id": "96e9fe2b2775d750918a6f92f0d3ad95", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "96e9fe2b2775d750918a6f92f0d3ad95", + "text": "https://doi.org/10.1016/j.dib.2018.11.134 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, "text": "https", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 }, { - "start_index": 0, "text": "https", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 }, { - "start_index": 0, "text": "https :// doi . org / 10 . 1016 / j . dib . 2018 . 11 .", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 } ], - "page_number": 1 - }, - "text": "https://doi.org/10.1016/j.dib.2018.11.134 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", - "type": "NarrativeText" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "757b62f5ce8ceee7150b7ce16ea16c93", + "text": "452", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "452", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "fb14c87d94f1676010e46b776d688612", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "72155e648a45896b081904929fc91cc6", + "text": "How data were acquired", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "How data were acquired", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a577cc1dfaa481812a9cff86c06d9835", + "text": "Data format Experimental factors", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Data format Experimental factors", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "9b9d298aef0e8b4a83bca09152a07128", + "text": "Experimental features Data source location", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Experimental features Data source location", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6f850529ced475435229c193a8ee7938", + "text": "Accessibility Related research article", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Accessibility Related research article", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c1c91f3ea75c102b6ed42b94530cbafe", + "text": "The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO4 solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24 h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition ef\ufb01ciency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225\u2013230.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO4 solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24 h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225–230.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a5dd74871d789945bd8a9c352d4817fb", + "text": "Value of the data", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Value of the data", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9bed69cd8287b2725bd845ca61ebb3cd", + "text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:1) Data presented here provide optimum conditions of waste material as inhibitor for stainless steel Type 316 in 0.5 M H2SO4 medium. The given data describe the inhibitive performance of eco-friendly egg shell powder on austenitic stainless steel Type 316 corrosion in sulphuric acid environment.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2ac3a042a8c89fd81718d1fda7ae576b", + "text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:1) The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type 316 can be used as basis in determining the inhibitive performance of the same inhibitor in other environments.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4962aa80bf0712155f4b781df06b4f1a", + "text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:1) The data can be used to examine the relationship between the process variable as it affect the", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "3b419c2d586d0eaf047f939c9e41b30f", + "text": "nature of inhibition of metals.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "nature of inhibition of metals.", - "type": "Title" + ] + } + } }, { + "type": "ListItem", "element_id": "f742be9cbb2d0697a88a9f749bf3185c", + "text": "1. Data", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "1. Data", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "732dc7fa0795c651041c10c2d318a8ae", + "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1\u20133 respectively. It can be seen clearly from these Figures that the ef\ufb01ciency of egg shell powder increase with the inhibitor con- centration, The increase in its ef\ufb01ciency could be as a result of increase in the constituent molecule", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1–3 respectively. It can be seen clearly from these Figures that the efficiency of egg shell powder increase with the inhibitor con- centration, The increase in its efficiency could be as a result of increase in the constituent molecule", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "f3a850e6bd8c0557408ad59167f5461e", + "text": ") g m", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": ") g m", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3cb4a395dab98ecdc71ad325411cf150", + "text": "(", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2b2ff92863f302ae630dc410b945333a", + "text": "s s o", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "s s o", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "0da3f5fd0fd07fc182d371760d9da3c0", + "text": "l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "l", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f929b69f05a08ec2b940c9b531740326", + "text": "t h g e W", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "t h g e W", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f0fbafddf553bdea61ac009ad080f1bc", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2b3d55b9ce69bcd15d67071cf0d11814", + "text": "30", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "30", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9673d82062115826d94732418d566ba2", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "b0304d4851460afe7c95d41feb260093", + "text": "10g 8g 6g 4g 2g Control", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "10g 8g 6g 4g 2g Control", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7f646e71d7bc0398e9917eec2c29b9ef", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "12a72cb263173964cf41736e5d3707b2", + "text": "48", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "48", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "673fe20c15c1210d134b56828c5a8216", + "text": "96", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "96", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c552ee9963f985fd6b3498e2cf2c6230", + "text": "144", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "144", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "16e471ece5a33bfb80b79b89aed6c731", + "text": "192", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "192", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "829e97853a2843ff6a8f1cfd3a6c74db", + "text": "Exposure Time (Hours)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Exposure Time (Hours)", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b6f97c1cdf0e9f1abebac577d4cf4b2a", + "text": "Fig. 1. Weight loss versus exposure time for stainless steel presence of ES.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Fig. 1. Weight loss versus exposure time for stainless steel presence of ES.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "09a5818257d4c970dc57191f38e1c1b0", + "text": "immersed in 0.5 M H2SO4 solution in the absence and", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "immersed in 0.5 M H2SO4 solution in the absence and", - "type": "NarrativeText" + ] + } + } }, { + "type": "Header", "element_id": "828e27fb21b2ca5e25ebdc5f0693ed7d", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "81cbf4e59dfe4444a94794a547e9063c", + "text": "2.7", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "2.7", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f1b0da24500b1f98c9debd55a2482b7f", + "text": ") r a e y / m m", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": ") r a e y / m m", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9efd31c777cb3a30d24545982e71644e", + "text": "( e t a r n o s o r r o C", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "( e t a r n o s o r r o C", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a535b571914bff036ee8d7b941a9e14c", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6445348d57f8715d980bbf266f6cc4b3", + "text": "1.8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "1.8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "dff5188d0e9db124ca45b71e4123404f", + "text": "0.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "0.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2e8665917db0a5ca56fee4e99f113c05", + "text": "10g 8g 6g 4g 2g Control", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "10g 8g 6g 4g 2g Control", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9b38508e1e3ddd8056482945216e1a28", + "text": "24", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "24", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4638ab00ad25c2044ed18ba57b766d7d", + "text": "48", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "48", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "252b95fc79d992358f5e7e4423febe14", + "text": "72", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "72", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "963002fc37d4568e01e1361b0f053b53", + "text": "96", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "96", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "292f8084988c4f4000fcd5bd2205c36a", + "text": "120", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "120", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5c317addf6947e11fba4c4f584f095c1", + "text": "144", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "144", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "95649afacb76442d050ed4534b80c4cc", + "text": "168", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "168", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "dad2b03f8f9d732efa19ab6a421e971d", + "text": "192", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "192", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "8f500e748d82811ccbb3b715e1932be6", + "text": "Exposure time", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Exposure time", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "03f95f2413bbe205cdc6975b1b98ecbe", + "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3c32d78e905ba61d1ae55e0b2ebd5946", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "78e1f4ff627e16f8159327279bdfcce0", + "text": "90", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "90", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "748c1e92cccf809f3776382792e93895", + "text": ")", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": ")", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "feccbab23ec407ef6cc22348a78244d3", + "text": "%", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "%", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "03ac492dccd89cf13a9d40ada0e543e1", + "text": "(", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "(", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2a02254b1d03abddd3537dc16c56a6fb", + "text": "y c n e c i f f", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "y c n e c i f f", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "67504491ab6c6c3603a75d246c50f54d", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6a2c597e6f8cfa0954a022873f9dcf6f", + "text": "E n o i t i b h n I", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "E n o i t i b h n I", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "f84aae3bf521f4166f63e87b5ef4f035", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b76e96beb931beaef6e3660f5d415c3d", + "text": "80", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "80", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0309a67bcfd5df32328af8c537c708e6", + "text": "70", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "70", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "33add4c83afdffa0745406aea3c75b49", + "text": "60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e180205da17abbe716978d5c4aa4dd03", + "text": "50", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "50", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "18f47de0e9dbec383a50a39027960bc6", + "text": "40", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "40", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "89ac5d03f7c6d4fa92bda587be577ab8", + "text": "30", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "30", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "93a1080514211ba59a1850d5600c261c", + "text": "2g 4g 6g 8g 10g", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "2g 4g 6g 8g 10g", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a66d7b20adfb12a1efd70da1d5b65375", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "82bf75b4e447974f22e48c9a450c45d5", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d460a5ac4c345529812f84dabf681d9f", + "text": "0", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "0", - "type": "UncategorizedText" + "page_number": 3, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a6282e95f41f8cb5061e0618a02dc09a", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "44e027245f6667d8282ec4728ad9c2dd", + "text": "40", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "40", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "935862a8bb1abed65afc07fc8d1da166", + "text": "60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fada482b9f03a3eda9be2ad92169bc9a", + "text": "80", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "80", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3179f53a093e5bb8064b777a8125c88e", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2053a3a5b1e12481504583f7f72979ff", + "text": "120", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "120", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b81dbb6336d2b992478316f8514e94b6", + "text": "140", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "140", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d4eb5e157598e6fa21a6b5b4254e9b5e", + "text": "160", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "160", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f082a93dce4872ddd5ecc97c3a9341fb", + "text": "180", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "180", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "4c19db10f909537bf29da9829ab6f81b", + "text": "Exposure Time (Hours)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Exposure Time (Hours)", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c566a56fa9e9ad6b97408310e357b079", + "text": "Fig. 3. Inhibition ef\ufb01ciency versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the presence of ES.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Fig. 3. Inhibition efficiency versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the presence of ES.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "21233d8e249dd8180c7f2c99a468f337", + "text": "number of inhibitor adsorbed on the surface of stainless steel at higher concentration, in order for the active sites of the stainless steel to be protected with the inhibitor molecules. Cathodic and anodic polarized potential are measured in the presence and absence of ES. Fig. 4 shows the cathodic and anodic polarization curves for stainless steel in 0.5 M H2SO4 solution at different ES concentrations. The electrochemical variables such as polarization resistance (PR), corrosion potential (Ecorr), cor- rosion current (icorr), anodic Tafel constant (ba), cathodic Tafel constant (bc) and corrosion rate (mm/ year) values are presented in Table 1. From the polarization curves and electrochemical parameter, icorr value decreased with the addition of inhibitor in 0.5 M H2SO4. Conversely, the icorr further decrease with an increase in inhibitor concentration indicating that the inhibition effects increase with an increase in the egg shell concentration. The process of egg shell inhibition could be attributed to the formation of egg shell powder adsorbed on stainless steel surface protecting corrosion of stainless steel in H2SO4 medium. The likely mechanism is the egg shell adsorption on stainless steel surface through the heteroatoms electron pair and the conjugated systems in egg shell molecular structure as shown in Fig. 1. When the concentration of inhibitor was increased from 2 to 10 g, the corrosion rate values drastically decreased this result show that waste egg shell powder is an effective corrosion inhibitor for stainless steel in H2SO4 solution. The shift in corrosion potential of stainless steel from Tafel curves and electrochemical data indicate that the inhibitor is a mixed-type corrosion inhibitor.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "number of inhibitor adsorbed on the surface of stainless steel at higher concentration, in order for the active sites of the stainless steel to be protected with the inhibitor molecules. Cathodic and anodic polarized potential are measured in the presence and absence of ES. Fig. 4 shows the cathodic and anodic polarization curves for stainless steel in 0.5 M H2SO4 solution at different ES concentrations. The electrochemical variables such as polarization resistance (PR), corrosion potential (Ecorr), cor- rosion current (icorr), anodic Tafel constant (ba), cathodic Tafel constant (bc) and corrosion rate (mm/ year) values are presented in Table 1. From the polarization curves and electrochemical parameter, icorr value decreased with the addition of inhibitor in 0.5 M H2SO4. Conversely, the icorr further decrease with an increase in inhibitor concentration indicating that the inhibition effects increase with an increase in the egg shell concentration. The process of egg shell inhibition could be attributed to the formation of egg shell powder adsorbed on stainless steel surface protecting corrosion of stainless steel in H2SO4 medium. The likely mechanism is the egg shell adsorption on stainless steel surface through the heteroatoms electron pair and the conjugated systems in egg shell molecular structure as shown in Fig. 1. When the concentration of inhibitor was increased from 2 to 10 g, the corrosion rate values drastically decreased this result show that waste egg shell powder is an effective corrosion inhibitor for stainless steel in H2SO4 solution. The shift in corrosion potential of stainless steel from Tafel curves and electrochemical data indicate that the inhibitor is a mixed-type corrosion inhibitor.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "443e25a2b54b8b2a43f8029e07f784b3", + "text": "453", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "453", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "33b112b0d8640ab4f13b22a2ee714086", + "text": "454", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "454", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "e87ca7b3cd075aaa0de8030768aca87c", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fd8a0feb5e755ece5d9abceb844649ff", + "text": "Fig. 4. Anodic and cathodic polarization curve of stainless steel in 0.5 M H2SO4 solution in the presence and absence of ES.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Fig. 4. Anodic and cathodic polarization curve of stainless steel in 0.5 M H2SO4 solution in the presence and absence of ES.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "598ed0a58406fc921332297f345b177a", + "text": "Table 1 Potentiodynamic polarization data for stainless steel in the absence and presence of ES in 0.5 M H2SO4 solution.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Table 1 Potentiodynamic polarization data for stainless steel in the absence and presence of ES in 0.5 M H2SO4 solution.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "9620a738189422654c5456fa16e507e7", + "text": "Inhibitor concentration (g)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Inhibitor concentration (g)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3acf3c88a28cad76984ac041a8f5984c", + "text": "bc (V/dec)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "bc (V/dec)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "da72962f658cee29281fa0e11a548813", + "text": "ba (V/dec)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "ba (V/dec)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "63a8b6b360c7a61ef88ad6c0b3d6581d", + "text": "Ecorr (V)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Ecorr (V)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "616ac8133f9b985812240add98badf5a", + "text": "icorr (A/cm2)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "icorr (A/cm2)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "5ef6c0b5c5c72f20a694c6bce97ed131", + "text": "Polarization resistance (\u03a9)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Polarization resistance (Ω)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6eff2d13b846a74ce08e348c7151dd1c", + "text": "Corrosion rate (mm/year)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Corrosion rate (mm/year)", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4a00cd3d6d5f9b71b105586a17125069", + "text": "0 2 4 6 8 10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0 2 4 6 8 10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "812204070320132126dcfec00abb07f7", + "text": "0.0335 1.9460 0.0163 0.3233 0.1240 0.0382", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0.0335 1.9460 0.0163 0.3233 0.1240 0.0382", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "08c96eb52fe4877d6a26d862f8919d35", + "text": "0.0409 0.0596 0.2369 0.0540 0.0556 0.0086", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0.0409 0.0596 0.2369 0.0540 0.0556 0.0086", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6fcf2a276d4b2d81f991b4eb6f04009a", + "text": "(cid:3) 0.9393 (cid:3) 0.8276 (cid:3) 0.8825 (cid:3) 0.8027 (cid:3) 0.5896 (cid:3) 0.5356", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "(cid:3) 0.9393 (cid:3) 0.8276 (cid:3) 0.8825 (cid:3) 0.8027 (cid:3) 0.5896 (cid:3) 0.5356", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a725c31d8b684d978174d4dc11d29106", + "text": "0.0003 0.0002 0.0001 5.39E-05 5.46E-05 1.24E-05", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0.0003 0.0002 0.0001 5.39E-05 5.46E-05 1.24E-05", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f66516a9a89cb0ab07ccf9e15086f394", + "text": "24.0910 121.440 42.121 373.180 305.650 246.080", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "24.0910 121.440 42.121 373.180 305.650 246.080", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a6663f53eba15d4c5596b1f8ec4208fd", + "text": "2.8163 1.5054 0.9476 0.4318 0.3772 0.0919", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2.8163 1.5054 0.9476 0.4318 0.3772 0.0919", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f5db77e611b74b7298f1b48a82ffc7be", + "text": "The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6\u20138 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The plot of inhibitor concentration over degree of surface coverage versus inhibitor concentration gives a straight line as shown in Fig. 5. The strong correlation reveals that egg shell adsorption on stainless surface in 0.5 M H2SO4 follow Langmuir adsorption isotherm. Figs. 6–8 show the SEM/EDX surface morphology analysis of stainless steel. Figs. 7 and 8 are the SEM/EDX images of the stainless steel specimens without and with inhibitor after weight loss experiment in sulphuric acid medium. The stainless steel surface corrosion product layer in the absence of inhibitor was porous and as a result gives no corrosion protection. With the presence of ES, corrosion damage was minimized, with an evidence of ES present on the metal surface as shown in Fig. 8.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e4e5f97ab5b56767ed489d7cd3ee04f7", + "text": "12", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "12", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "afc0a737ef1e5ffa9d6b72bb32fef683", + "text": "C/0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "C/0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d9a38658d857c1141618ad9115dc48b4", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2d046240fd1a0ff3420926f0a54e0aaa", + "text": "8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4c136188f1e2e974ec1003968916824a", + "text": "0 / C", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0 / C", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "594366da1ff6e7a343ec1666c5852389", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d84c13ba166bd29d042db10acba6d243", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d4210b5ce6f99e242d8c1aa586691286", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7afb08e1cc308afebdc038fc7e4595ed", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "696d24804069bc593dc624bf7ba904e2", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ef054383c29789c2743d93a6189f7f47", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ae2f6fc244a6aa053403e38912fdc56a", + "text": "8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "33c153482d9c925a35781bd5c9697648", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "8f325f6eb1678922e83e32746b981b80", + "text": "Concentration (g)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Concentration (g)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "9d46c2166a49c9e3a75ed98cb20ce13f", + "text": "Fig. 5. Langmuir adsorption isotherm of ES.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Fig. 5. Langmuir adsorption isotherm of ES.", - "type": "Title" + ] + } + } }, { + "type": "Header", "element_id": "9d639b03d26ec1872a4e91ac99031fdf", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "Title", "element_id": "cfea47dcbf32f3d8597e777afa74d20e", + "text": "Fig. 6. SEM/EDX image of as-received stainless steel.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Fig. 6. SEM/EDX image of as-received stainless steel.", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a1e6c9bab7935444a7491a47091be10c", + "text": "Fig. 7. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution without inhibitor.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Fig. 7. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution without inhibitor.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "49e093091da774c567151e5147c70027", + "text": "Fig. 8. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution with the presence of inhibitor.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Fig. 8. SEM/EDX image of stainless steel immersed in 0.5 M H2SO4 solution with the presence of inhibitor.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8ac2e9f97dc89f9d9bac5baec281f7f2", + "text": "455", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "455", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e303e27893be099ef5fd03235efee7fe", + "text": "456", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "456", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "91c8bf5283b45a71164a103f496f93c1", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "ListItem", "element_id": "bffefa92b06bc6009f81965d3dadc0ce", + "text": "2. Experimental design, materials and methods", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "2. Experimental design, materials and methods", - "type": "ListItem" + ] + } + } }, { + "type": "Title", "element_id": "484707d26d81d85df99f322c1bbb8ca3", + "text": "2.1. Material", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "2.1. Material", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "79d10fe9600d8d3428b5df86faa7c099", + "text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [3\u20135]. The structural formula of egg shell powder is shown in Fig. 9.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [3–5]. The structural formula of egg shell powder is shown in Fig. 9.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "b6bd160c80816ff7b2d8a36ccfc67568", + "text": "Fig. 9. Chemical structure of egg shell powder.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Fig. 9. Chemical structure of egg shell powder.", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "aeafe864b565b167f053a348390b3eff", + "text": "2.2. Weight loss method", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "2.2. Weight loss method", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0e51f945cacb5ec184a3613487b6fefb", + "text": "This physical measurement was carried out in order to provide direct result on how the corrosive environment affects the test sample. The cleaned and weighed specimen was suspended in beakers with the aid of glass hooks and rods with the test solution of ES at different concentration (2, 4, 6, 8 and 10 g). The pre-weighed specimen was retrieved from the test solution after every 24 h, cleaned, dried and reweighed. The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss which was used to calculate corrosion rate and inhibition ef\ufb01ciency.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "This physical measurement was carried out in order to provide direct result on how the corrosive environment affects the test sample. The cleaned and weighed specimen was suspended in beakers with the aid of glass hooks and rods with the test solution of ES at different concentration (2, 4, 6, 8 and 10 g). The pre-weighed specimen was retrieved from the test solution after every 24 h, cleaned, dried and reweighed. The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss which was used to calculate corrosion rate and inhibition efficiency.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "fed48b9de93d4324223aa5fbdfe2f359", + "text": "The corrosion rate (CR) was calculated using Eq. (1) [1\u20135]", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The corrosion rate (CR) was calculated using Eq. (1) [1–5]", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2c4a913c3a4b8bccd9c7003f25ae25af", + "text": "(cid:1) \u00de \u00bc 87:6W DAT", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "(cid:1) Þ ¼ 87:6W DAT", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "902d0aabf523c467c200f5203957e606", + "text": "(cid:3)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "(cid:3)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "44d54b6fb44ac7afc9f40a0e7a5fcde3", + "text": "Corrosion rate CR\u00f0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Corrosion rate CRð", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7459b20ea68d65b7a967500f22223507", + "text": "where: W is weight loss in mg, A is specimen surface area, T is immersion period in hours and D is the specimen density. From the corrosion rate, the surface coverage (\u03b8) and inhibition ef\ufb01ciencies (IE %) were determined using Eqs. (2) and (3) respectively", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "where: W is weight loss in mg, A is specimen surface area, T is immersion period in hours and D is the specimen density. From the corrosion rate, the surface coverage (θ) and inhibition efficiencies (IE %) were determined using Eqs. (2) and (3) respectively", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "d269706e81c2b5978ae0b5c820ce176a", + "text": "\u03b8 \u00bc CRo (cid:3) CR", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "θ ¼ CRo (cid:3) CR", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b2cc1eda5ffbccf6416235c44181538c", + "text": "CRo", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "CRo", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d48a9ee64508de2e63b2f4579ef78432", + "text": "IE \u00f0%\u00de \u00bc CRo (cid:3) CR", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "IE ð%Þ ¼ CRo (cid:3) CR", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3bf244c1b2eb32875b292a28c130aba4", + "text": "CRo", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "CRo", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "2c6d5581a35c83236153f78c5b53cb60", + "text": "x", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "x", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ca4aeca8c2a7e6b9df923db4a5902289", + "text": "100 1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "100 1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a47048cff18528a9a4838728a55e526a", + "text": "where: CRo and CR are the corrosion rate in absence and presence of inhibitor respectively.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "where: CRo and CR are the corrosion rate in absence and presence of inhibitor respectively.", - "type": "NarrativeText" + } + ] + } + } }, { + "type": "Title", "element_id": "6aabbfd8e92223470a6c9184a84857c0", + "text": "2.3. Potentiodynamic polarization method", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "2.3. Potentiodynamic polarization method", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c653c9cca5ebdd3089b705f279316500", + "text": "The potentiodynamic polarization method was performed on the prepared test samples immersed in 0.5 M H2SO4 solution in the presence and absence of different ES concentrations. A three electrode system was used; stainless steel Type 316 plate as working electrode with an exposed area of 1.0 cm2, platinum rod as counter electrode and silver chloride electrode as reference electrode. The electrode was polished, degreased in acetone and thoroughly rinsed with distilled water before the experiment. Current density against applied potential was plotted. The slope of the linear part in anodic and cathodic plots gives anodic and cathodic constants according to the Stern\u2013Geary equation, and the", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The potentiodynamic polarization method was performed on the prepared test samples immersed in 0.5 M H2SO4 solution in the presence and absence of different ES concentrations. A three electrode system was used; stainless steel Type 316 plate as working electrode with an exposed area of 1.0 cm2, platinum rod as counter electrode and silver chloride electrode as reference electrode. The electrode was polished, degreased in acetone and thoroughly rinsed with distilled water before the experiment. Current density against applied potential was plotted. The slope of the linear part in anodic and cathodic plots gives anodic and cathodic constants according to the Stern–Geary equation, and the", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "b1cdefa47658616bf79766f8fc353f7c", + "text": "\u00f01\u00de", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "ð1Þ", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "a1a035eeaa7c25a2b543757f4cc7d0fb", + "text": "\u00f02\u00de", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "ð2Þ", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "74d17735c911d69b6d10e05d0c9d79d6", + "text": "\u00f03\u00de", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "ð3Þ", - "type": "Title" + ] + } + } }, { + "type": "Header", "element_id": "e40c3ee561b10ca5b7a76900c8d5b263", + "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451\u2013457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a8d445f830ed31990875a519f4be0eb5", + "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential (cid:3) 1.5 v, step potential 0.001 m/s and stop potential of \u00fe1.5 v set was used in this study.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential (cid:3) 1.5 v, step potential 0.001 m/s and stop potential of þ1.5 v set was used in this study.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "2461424bae61c8cfad1cd33a949843f0", + "text": "Acknowledgements", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Acknowledgements", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2d8a74bbba4ad3bb13afc8a98daec91d", + "text": "This work was supported by the National Research Foundation of South Africa and the Tshwane", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "This work was supported by the National Research Foundation of South Africa and the Tshwane", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "154e2a7bdebd1347eccb08f349284130", + "text": "University of Technology Pretoria South Africa.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "University of Technology Pretoria South Africa.", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "41a46b0a6852a31b1e51cf65a4ecf87d", + "text": "Transparency document. Supporting information", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Transparency document. Supporting information", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c5635281e7e879dd338b99ae84f94056", + "text": "Transparency document associated with this article can be found in the online version at https://doi.", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 94, "text": ":// doi", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 94 } ], - "page_number": 7 - }, - "text": "Transparency document associated with this article can be found in the online version at https://doi.", - "type": "NarrativeText" - }, - { - "element_id": "ee62928948d5d7b5e13edf65d917dc63", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "ee62928948d5d7b5e13edf65d917dc63", + "text": "org/10.1016/j.dib.2018.11.134.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, "text": "org / 10 . 1016 / j . dib . 2018 . 11 . 134", - "url": "https://doi.org/10.1016/j.dib.2018.11.134" + "url": "https://doi.org/10.1016/j.dib.2018.11.134", + "start_index": 0 } ], - "page_number": 7 - }, - "text": "org/10.1016/j.dib.2018.11.134.", - "type": "UncategorizedText" - }, - { - "element_id": "dbe83d8d2b6784a17d8faae3633b97f9", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "References", - "type": "Title" + ] + } + } }, { - "element_id": "d08513d888e4133fda75841dd05273d9", + "type": "Title", + "element_id": "dbe83d8d2b6784a17d8faae3633b97f9", + "text": "References", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d08513d888e4133fda75841dd05273d9", + "text": "[1] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "O . Sanni , A . P . I . Popoola , O . S . I . Fayomi ,", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1" + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 4 } ], - "page_number": 7 - }, - "text": "[1] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution", - "type": "NarrativeText" - }, - { - "element_id": "29736d79aeb1e5fc195876dbf12f1c57", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "29736d79aeb1e5fc195876dbf12f1c57", + "text": "using eco-friendly waste product, Results Phys. 9 (2018) 225\u2013230.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1" + "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 0 }, { - "start_index": 0, - "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 – 230", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1" + "text": "usingeco - friendlywasteproduct , ResultsPhys . 9 ( 2018 ) 225 \u2013 230", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref1", + "start_index": 0 } ], - "page_number": 7 - }, - "text": "using eco-friendly waste product, Results Phys. 9 (2018) 225–230.", - "type": "NarrativeText" - }, - { - "element_id": "ca40f2c0d5a95e8cddab1c3b76f95e9e", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca40f2c0d5a95e8cddab1c3b76f95e9e", + "text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "O . Sanni , A . P . I . Popoola , A . Kolesnikov ,", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2" + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 4 } ], - "page_number": 7 - }, - "text": "[2] O. Sanni, A.P.I. Popoola, A. Kolesnikov, Constitutive modeling for prediction of optimal process parameters in corrosion", - "type": "NarrativeText" - }, - { - "element_id": "e42cb45853ffd3e2c81095a126918c6c", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "e42cb45853ffd3e2c81095a126918c6c", + "text": "inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1\u201315.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2" + "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 0 }, { - "start_index": 0, - "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 – 15", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2" + "text": "inhibitionofausteniticstainlesssteel ( Type316 )/ acidicmedium , Mater . Res . Express . 5 ( 10 )( 2018 ) 1 \u2013 15", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref2", + "start_index": 0 } ], - "page_number": 7 - }, - "text": "inhibition of austenitic stainless steel (Type 316)/acidic medium, Mater. Res. Express. 5 (10) (2018) 1–15.", - "type": "NarrativeText" - }, - { - "element_id": "610ae41b07604b353631457b9a4ad632", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "610ae41b07604b353631457b9a4ad632", + "text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "O . Sanni , A . P . I . Popoola , O . S . I . Fayomi ,", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3" + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 4 } ], - "page_number": 7 - }, - "text": "[3] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, The inhibitive study of egg shell powder on UNS N08904 austenitic stainless steel", - "type": "NarrativeText" - }, - { - "element_id": "ae14702f67ee1c5d2e5316e8344a6971", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ae14702f67ee1c5d2e5316e8344a6971", + "text": "corrosion in chloride solution, Def. Technol. 14 (2018) 463\u2013468.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3" + "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 0 }, { - "start_index": 0, - "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 – 468", - "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3" + "text": "corrosioninchloridesolution , Def . Technol . 14 ( 2018 ) 463 \u2013 468", + "url": "http://refhub.elsevier.com/S2352-3409(18)31527-0/sbref3", + "start_index": 0 } ], - "page_number": 7 - }, - "text": "corrosion in chloride solution, Def. Technol. 14 (2018) 463–468.", - "type": "NarrativeText" - }, - { - "element_id": "d1c8e3e15192f1bdcda9cf8e38a5573f", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "d1c8e3e15192f1bdcda9cf8e38a5573f", + "text": "[4] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1\u201317. https://doi.org/10.1007/ s13632-018-0495-5.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 233, "text": "https", - "url": "https://doi.org/10.1007/s13632-018-0495-5" + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 }, { - "start_index": 233, "text": "https", - "url": "https://doi.org/10.1007/s13632-018-0495-5" + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 }, { - "start_index": 233, "text": "https :// doi . org / 10 . 1007", - "url": "https://doi.org/10.1007/s13632-018-0495-5" + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 233 }, { - "start_index": 258, "text": "s13632 - 018 - 0495 - 5", - "url": "https://doi.org/10.1007/s13632-018-0495-5" + "url": "https://doi.org/10.1007/s13632-018-0495-5", + "start_index": 258 } ], - "page_number": 7 - }, - "text": "[4] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, C.A. Loto, A comparative study of inhibitive effect of waste product on stainless steel corrosion in sodium chloride/sulfuric acid environments, Metallogr. Microstruct. Anal. (2018) 1–17. https://doi.org/10.1007/ s13632-018-0495-5.", - "type": "NarrativeText" - }, - { - "element_id": "3827d49ec98a215986f78d1df2ae2d33", - "metadata": { + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3827d49ec98a215986f78d1df2ae2d33", + "text": "[5] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Inhibition of engineering material in sulphuric acid solution using waste product, Contributed Papers from Materials Science and Technology (MS&T18), 2018. \u2329https://doi.org/10.7449/2018/MST_2018_254_261\u232a.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 202, "text": "https :// doi . org / 10 . 7449 / 2018 / MST _ 2018 _ 254 _ 261", - "url": "https://doi.org/10.7449/2018/MST_2018_254_261" + "url": "https://doi.org/10.7449/2018/MST_2018_254_261", + "start_index": 202 } ], - "page_number": 7 - }, - "text": "[5] O. Sanni, A.P.I. Popoola, O.S.I. Fayomi, Inhibition of engineering material in sulphuric acid solution using waste product, Contributed Papers from Materials Science and Technology (MS&T18), 2018. 〈https://doi.org/10.7449/2018/MST_2018_254_261〉.", - "type": "NarrativeText" + "page_number": 7, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7fbcd3b873966a649efd837300e0c576", + "text": "457", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/65/11/main.PMC6312790.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "457", - "type": "UncategorizedText" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.json similarity index 52% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.json index 23af81b2e0..d649cb4330 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.json @@ -1,2302 +1,2514 @@ [ { + "type": "Header", "element_id": "d25e5f46b5be5f4c8a6573d0688dae93", + "text": "Data in Brief 22 (2019) 484\u2013487", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "DatainBrief22 ( 2019 ) 484 –", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "text": "DatainBrief22 ( 2019 ) 484 \u2013", + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 } ], - "page_number": 1 - }, - "text": "Data in Brief 22 (2019) 484–487", - "type": "Header" - }, - { - "element_id": "ffd4c08fe1f13ed4b1c1c523ead5510b", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "ffd4c08fe1f13ed4b1c1c523ead5510b", + "text": "Contents lists available at ScienceDirect", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": -1, "text": "", - "url": "www.sciencedirect.com/science/journal/23523409" + "url": "www.sciencedirect.com/science/journal/23523409", + "start_index": -1 } ], - "page_number": 1 - }, - "text": "Contents lists available at ScienceDirect", - "type": "NarrativeText" - }, - { - "element_id": "ab45cdb29d177758321b79d0e5430958", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Data in Brief", - "type": "Title" + ] + } + } }, { - "element_id": "b6ed6a9bb542e0891cebca3fa85e6bcd", + "type": "Title", + "element_id": "ab45cdb29d177758321b79d0e5430958", + "text": "Data in Brief", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "Title", + "element_id": "b6ed6a9bb542e0891cebca3fa85e6bcd", + "text": "journal homepage: www.elsevier.com/locate/dib", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 18, "text": "www . elsevier . com / locate /", - "url": "www.elsevier.com/locate/dib" + "url": "www.elsevier.com/locate/dib", + "start_index": 18 } ], - "page_number": 1 - }, - "text": "journal homepage: www.elsevier.com/locate/dib", - "type": "Title" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Title", "element_id": "1acc2228e407a58c34b39c30aed641fe", + "text": "Data Article", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Data Article", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "798dd79fdd2f8266cf92f28200198e08", + "text": "A benchmark dataset for the multiple depot vehicle scheduling problem", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "A benchmark dataset for the multiple depot vehicle scheduling problem", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8edd00e1188d7cb75051b1998ee494a9", + "text": "Sarang Kulkarni a,b,c,n, Mohan Krishnamoorthy d,e, Abhiram Ranade f, Andreas T. Ernst c, Rahul Patil b", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Sarang Kulkarni a,b,c,n, Mohan Krishnamoorthy d,e, Abhiram Ranade f, Andreas T. Ernst c, Rahul Patil b", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7d3eb41c30b752ac6026851e8119f642", + "text": "a IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India b SJM School of Management, IIT Bombay, Powai, Mumbai 400076, India c School of Mathematical Sciences, Monash University, Clayton, VIC 3800, Australia d Department of Mechanical and Aerospace Engineering, Monash University, Clayton, VIC 3800, Australia e School of Information Technology and Electrical Engineering, The University of Queensland, QLD 4072, Australia f Department of Computer Science and Engineering, IIT Bombay, Powai, Mumbai 400076, India", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "a IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India b SJM School of Management, IIT Bombay, Powai, Mumbai 400076, India c School of Mathematical Sciences, Monash University, Clayton, VIC 3800, Australia d Department of Mechanical and Aerospace Engineering, Monash University, Clayton, VIC 3800, Australia e School of Information Technology and Electrical Engineering, The University of Queensland, QLD 4072, Australia f Department of Computer Science and Engineering, IIT Bombay, Powai, Mumbai 400076, India", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "3f086bae7b6270727b6fca8ba4563fd7", + "text": "a r t i c l e i n f o", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "a r t i c l e i n f o", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a951e8fba28630797a561ae24142f1b9", + "text": "a b s t r a c t", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "a b s t r a c t", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "90549df65b3824f67f0290bc96644155", + "text": "Article history: Received 21 November 2018 Received in revised form 13 December 2018 Accepted 15 December 2018 Available online 18 December 2018", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Article history: Received 21 November 2018 Received in revised form 13 December 2018 Accepted 15 December 2018 Available online 18 December 2018", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "3e158fd01d34697ac14890732b84a1fc", + "text": "This data article presents a description of a benchmark dataset for the multiple depot vehicle scheduling problem (MDVSP). The MDVSP is to assign vehicles from different depots to timetabled trips to minimize the total cost of empty travel and waiting. The dataset has been developed to evaluate the heuristics of the MDVSP that are presented in \u201cA new formulation and a column generation-based heuristic for the multiple depot vehicle sche- duling problem\u201d (Kulkarni et al., 2018). The dataset contains 60 problem instances of varying size. Researchers can use the dataset to evaluate the future algorithms for the MDVSP and compare the performance with the existing algorithms. The dataset includes a program that can be used to generate new problem instances of the MDVSP.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "This data article presents a description of a benchmark dataset for the multiple depot vehicle scheduling problem (MDVSP). The MDVSP is to assign vehicles from different depots to timetabled trips to minimize the total cost of empty travel and waiting. The dataset has been developed to evaluate the heuristics of the MDVSP that are presented in “A new formulation and a column generation-based heuristic for the multiple depot vehicle sche- duling problem” (Kulkarni et al., 2018). The dataset contains 60 problem instances of varying size. Researchers can use the dataset to evaluate the future algorithms for the MDVSP and compare the performance with the existing algorithms. The dataset includes a program that can be used to generate new problem instances of the MDVSP.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "298de5d25d4db319d8cb1c4da4e14411", + "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "25ce21c9671271c1639f549d88644f16", + "text": "DOI of original article: https://doi.org/10.1016/j.trb.2018.11.007 n Corresponding author at: IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India.", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 25, "text": "https :// doi . org / 10 . 1016 / j . trb . 2018 . 11 . nCorrespondingauthorat", - "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007" + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 }, { - "start_index": 25, "text": "https", - "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007" + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 }, { - "start_index": 25, "text": "https :// doi . org / 10 . 1016 / j . trb . 2018 . 11 .", - "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007" + "url": "http://dx.doi.org/10.1016/j.trb.2018.11.007", + "start_index": 25 } ], - "page_number": 1 - }, - "text": "DOI of original article: https://doi.org/10.1016/j.trb.2018.11.007 n Corresponding author at: IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai 400076, India.", - "type": "UncategorizedText" - }, - { - "element_id": "b4b1b0bb1bf27aa4de6d404b9304fb02", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "Title", + "element_id": "b4b1b0bb1bf27aa4de6d404b9304fb02", + "text": "E-mail address: sarangkulkarni@iitb.ac.in (S. Kulkarni).", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 16, "text": "sarangkulkarni @ iitb . ac . in", - "url": "mailto:sarangkulkarni@iitb.ac.in" + "url": "mailto:sarangkulkarni@iitb.ac.in", + "start_index": 16 } ], - "page_number": 1 - }, - "text": "E-mail address: sarangkulkarni@iitb.ac.in (S. Kulkarni).", - "type": "Title" - }, - { - "element_id": "3bf8a8c86295c8d68682ff1c4594b485", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "3bf8a8c86295c8d68682ff1c4594b485", + "text": "https://doi.org/10.1016/j.dib.2018.12.055 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, "text": "https", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 }, { - "start_index": 0, "text": "https", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 }, { - "start_index": 0, "text": "https :// doi . org / 10 . 1016 / j . dib . 2018 . 12 .", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 } ], - "page_number": 1 - }, - "text": "https://doi.org/10.1016/j.dib.2018.12.055 2352-3409/& 2018 Published by Elsevier Inc. This is an open access article under the CC BY-NC-ND license (http://creativecommons.org/licenses/by-nc-nd/4.0/).", - "type": "NarrativeText" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Header", "element_id": "690f7bab68c635029827f497e6c2b218", + "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484\u2013487", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487", - "type": "Header" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e93f43b23b30a616389e12f193fdf212", + "text": "485", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "485", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "8b5f19753e010793be1dd03a4efe1876", + "text": "Speci\ufb01cations table", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Specifications table", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b592fc872f2d852ad0242b2353e61673", + "text": "Subject area Operations research More speci\ufb01c subject area Vehicle scheduling Type of data How data were acquired", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d21722fd648aed04c8119948bf24b400", + "text": "Tables, text \ufb01les Arti\ufb01cially generated by a C \u00fe \u00fe program on Intels Xeons CPU E5\u2013 2670 v2 with Linux operating system. Raw Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Randomly generated instances IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data can be downloaded from https://orlib.uqcloud.net/ Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457\u2013487 [3].", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 444, "text": ".,", - "url": "https://orlib.uqcloud.net/" + "url": "https://orlib.uqcloud.net/", + "start_index": 444 } ], - "page_number": 2 - }, - "text": "Tables, text files Artificially generated by a C þ þ program on Intels Xeons CPU E5– 2670 v2 with Linux operating system. Raw Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Randomly generated instances IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data can be downloaded from https://orlib.uqcloud.net/ Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457–487 [3].", - "type": "NarrativeText" - }, - { - "element_id": "156810b54dfdfa06606b2ab9c20e5936", - "metadata": { + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Data format Experimental factors", - "type": "Title" + ] + } + } }, { - "element_id": "f10143ddfaeadcb83593edbd06f6dae5", + "type": "Title", + "element_id": "156810b54dfdfa06606b2ab9c20e5936", + "text": "Data format Experimental factors", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Experimental features Data source location Data accessibility Related research article", - "type": "Title" + ] + } + } }, { - "element_id": "61e613d4cdb2f24fcb40060db45431c0", + "type": "Title", + "element_id": "f10143ddfaeadcb83593edbd06f6dae5", + "text": "Experimental features Data source location Data accessibility Related research article", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "Title", + "element_id": "61e613d4cdb2f24fcb40060db45431c0", + "text": "Value of the data", + "metadata": { "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Value of the data", - "type": "Title" + "page_number": 2, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "NarrativeText", "element_id": "d0dfba5954b055b335476e9249b9a73c", + "text": "(cid:2) The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:2) The dataset contains 60 different problem instances of the MDVSP that can be used to evaluate the", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "2956461e611848aeaccd16b99fc03400", + "text": "performance of the algorithms for the MDVSP.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "performance of the algorithms for the MDVSP.", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2f732a3a72336ba52b0b0de6d0008640", + "text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:2) The data provide all the information that is required to model the MDVSP by using the existing", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "5bd31208ba63e7a44aeea1fd4d721d54", + "text": "mathematical formulations.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "mathematical formulations.", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "038f53e4bdc8c6ea7b1c63f1b9a73e2f", + "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:2) All the problem instances are available for use without any restrictions. (cid:2) The benchmark solutions and solution time for the problem instances are presented in [3] and can", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "15906f62459fa76ddadb7a7ef1ce556b", + "text": "be used for the comparison.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "be used for the comparison.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4a39c62bb4f7476ec42fd81325ea6f19", + "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:2) The dataset includes a program that can generate similar problem instances of different sizes.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "414bd3131cd65d5c68e1c7a140297506", + "text": "1. Data", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "1. Data", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "96589dd8025c674caf26c856ea689d4e", + "text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate \ufb01le. Each \ufb01le is named as \u2018RN-m-n-k.dat\u2019, where \u2018m\u2019, \u2018n\u2019, and \u2018k\u2019 denote the number of depots, the number of trips, and the instance number \u2018RN-8\u20131500-01.dat\u2019, for is the \ufb01rst problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, \u00f0m; n\u00de, \ufb01ve instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net.", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 509, - "text": "2500 , and3000 . size , ðm ; nÞ , fiveinstancesareprovided . Thedatasetcanbedownloadedfromhttps :// orlib . uqcloud . net", - "url": "https://orlib.uqcloud.net" + "text": "2500 , and3000 . size , \u00f0m ; n\u00de , \ufb01veinstancesareprovided . Thedatasetcanbedownloadedfromhttps :// orlib . uqcloud . net", + "url": "https://orlib.uqcloud.net", + "start_index": 509 } ], - "page_number": 2 - }, - "text": "The dataset contains 60 different problem instances of the multiple depot vehicle scheduling pro- blem (MDVSP). Each problem instance is provided in a separate file. Each file is named as ‘RN-m-n-k.dat’, where ‘m’, ‘n’, and ‘k’ denote the number of depots, the number of trips, and the instance number ‘RN-8–1500-01.dat’, for is the first problem instance with 8 depots and 1500 trips. For the number of depots, m, we used three values, 8, 12, and 16. The four values for the number of trips, n, are 1500, 2000, 2500, and 3000. For each size, ðm; nÞ, five instances are provided. The dataset can be downloaded from https://orlib.uqcloud.net.", - "type": "NarrativeText" + "page_number": 2, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "97686fd4b810190336f3a3f4debb4c5d", + "text": "\u2018\u00f0m; n\u00de\u2019,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "‘ðm; nÞ’,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a1d0fff4ecc99ed0b3792f63af7ac732", + "text": "the size,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "the size,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "18ddc61212b977693c3ab4a9e2a98213", + "text": "respectively. For example,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "respectively. For example,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f5af2f4ccedef8e9c9222943207ddce1", + "text": "the problem instance,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "the problem instance,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "275e61db64667898a0ec65d6cbbff69b", + "text": "For each problem instance, the following information is provided: The number of depots m\u00f0 The number of trips \u00f0n\u00de, The number of locations \u00f0l\u00de, The number of vehicles at each depot, For each trip i A 1; 2; \u2026; n, a start time, ts", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "For each problem instance, the following information is provided: The number of depots mð The number of trips ðnÞ, The number of locations ðlÞ, The number of vehicles at each depot, For each trip i A 1; 2; …; n, a start time, ts", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f1d7de16fe466b5c9f0396600da6d3ef", + "text": "\u00de,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Þ,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "d07db900a92fbc399e2eac5e0fc704ee", + "text": "i , a start location, ls", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "i , a start location, ls", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3c0009859c6faa133b3e59b1b5c42c5b", + "text": "i , an end time, te", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "i , an end time, te", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4f3baeb46b82b7cb0acec9e6b9ac9787", + "text": "i , and an end location, le i ,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "i , and an end location, le i ,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "84e91ae08f7e4ae8996bb4cdbbfb9e32", + "text": "and", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "and", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9e7301ebb3fd5cbe1410901ea78c02db", + "text": "(cid:2) The travel time, \u03b4ij, between any two locations i; j A 1; \u2026; l.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "(cid:2) The travel time, δij, between any two locations i; j A 1; …; l.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5e73cd663ab2449350114f86e23f6bbb", + "text": "All times are in minutes and integers. The planning duration is from 5 a.m. to around midnight. Each instance has two classes of trips, short trips and long trips, with 40% short trips and 60% long trips. The duration of a short trip is less than a total of 45 min and the travel time between the start", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "All times are in minutes and integers. The planning duration is from 5 a.m. to around midnight. Each instance has two classes of trips, short trips and long trips, with 40% short trips and 60% long trips. The duration of a short trip is less than a total of 45 min and the travel time between the start", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "87149858e00c98f10a2b08be1b8d584a", + "text": "486", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "486", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "5fc26c03275c46c5eb2ae66c0c288d2b", + "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484\u2013487", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a3a97226d270316d06712c89f7ff489d", + "text": "and end location of the trip. A long trip is about 3\u20135 h in duration and has the same start and end location. For all instances, m r l and the locations 1; \u2026; m correspond to depots, while the remaining locations only appear as trip start and end locations.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "and end location of the trip. A long trip is about 3–5 h in duration and has the same start and end location. For all instances, m r l and the locations 1; …; m correspond to depots, while the remaining locations only appear as trip start and end locations.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "36bb62577b390f929d88ed7d004c1e3e", + "text": "i \u00fe\u03b4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "i þδ", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "51071653fbb405a5c84831cbacc6c618", + "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of \u00f0ts", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "edff69ec864e554eb9aee86908ecac9c", + "text": "Z te", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Z te", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f038d089ae51f445f96217852ae9c670", + "text": "a ls", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "a ls", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5066fe5d8ca5d5f91f7312ec35a9a7e8", + "text": "A trip j can be covered after trip i by the same vehicle, if ts j", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "A trip j can be covered after trip i by the same vehicle, if ts j", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d3b130ec44c8f5b0865012570fe82fd0", + "text": "j , the vehicle must travel empty from le j (cid:3)te i \u00de. A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satis\ufb01ed:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "j , the vehicle must travel empty from le j (cid:3)te i Þ. A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satisfied:", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "871530d7bbaa529bbc177fc2a041720e", + "text": "j", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "j", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "bfd40d52e047822b7bc341a4741f1f73", + "text": "i to ls", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "i to ls", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "a8f50afa154ed8c4545362eeb8ca5799", + "text": "1. Each schedule should start and end at the same depot. 2. Each trip should be covered by only one vehicle. 3. The number of schedules that start from a depot should not exceed the number of vehicles at the depot.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "1. Each schedule should start and end at the same depot. 2. Each trip should be covered by only one vehicle. 3. The number of schedules that start from a depot should not exceed the number of vehicles at the depot.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "80d7ee3f1337fffbcb42c78e218d8aad", + "text": "A suf\ufb01cient number of vehicles are provided to maintain the feasibility of an instance. For each instance size \u00f0m; n\u00de, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over \ufb01ve instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "A sufficient number of vehicles are provided to maintain the feasibility of an instance. For each instance size ðm; nÞ, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over five instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2c71b28268ae79e366c8190e28761e31", + "text": "The description of the \ufb01le for each problem instance is presented in Table 2. The \ufb01rst line in the \ufb01le provides the number of depots \u00f0m\u00de, the number of trips, \u00f0n\u00de, and the number of locations \u00f0l\u00de, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; \u2026; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; \u2026; l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "924fc12bebb375f9c74313489cf16217", + "text": "f", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "f", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "028c5c64e9591944e620e8308f516b5a", + "text": "(cid:1)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "(cid:1)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ce73daceb6d992f6af62cceb4a3d424f", + "text": "(cid:3)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "(cid:3)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4c3e98e95e0007df7a9e116f5df403c8", + "text": ".", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": ".", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7c74ad0f1b0edb685ba951da2a788af8", + "text": "The dataset also includes a program \u2018GenerateInstance.cpp\u2019 that can be used to generate new instances. The program takes three inputs, the number of depots \u00f0m\u00de, the number of trips \u00f0n\u00de, and the number of instances for each size \u00f0m; n\u00de.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The dataset also includes a program ‘GenerateInstance.cpp’ that can be used to generate new instances. The program takes three inputs, the number of depots ðmÞ, the number of trips ðnÞ, and the number of instances for each size ðm; nÞ.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "155c4752aa12e6b82164f5ac49103a19", + "text": "Table 1 Average number of locations, times, vehicles and empty travels for each instance size.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Table 1 Average number of locations, times, vehicles and empty travels for each instance size.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "6d92abd137f1e1a6f7d9ecfa1edb0cf4", + "text": "Instance size (m, n)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Instance size (m, n)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "bcd163c5719297fd86b9eebacf8a9c24", + "text": "Average number of", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Average number of", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "204a9747099a8efd4aa0b05c9e5c38d2", + "text": "Locations", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Locations", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "327cb3d0fb60857fee3d8f0c2c78d613", + "text": "Times", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Times", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6592bb72dcd3912aa6fabc3df525aeda", + "text": "Vehicles", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Vehicles", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "80ce4476651a7ac735c554343aeb749f", + "text": "Possible empty travels", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Possible empty travels", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "71a7492ba9c12eef52065aabaebc3a7c", + "text": "(8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "(8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7701857f59bdba5844b24edc32749d05", + "text": "568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2bf95679e315fbbd9f0ceb0ce36d9197", + "text": "975.20 1048.00 1078.00 1113.20 994.00 1040.60 1081.00 1107.40 985.40 1040.60 1083.20 1101.60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "975.20 1048.00 1078.00 1113.20 994.00 1040.60 1081.00 1107.40 985.40 1040.60 1083.20 1101.60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "da4ae500af3e46e7446a28cddd32679c", + "text": "652.20 857.20 1082.40 1272.80 642.00 861.20 1096.00 1286.20 667.80 872.40 1076.40 1284.60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "652.20 857.20 1082.40 1272.80 642.00 861.20 1096.00 1286.20 667.80 872.40 1076.40 1284.60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e21d6005188c8a7bfcb95e42868b986c", + "text": "668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "fa23407a7c3c99ae3b6fb79034698807", + "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484\u2013487", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487", - "type": "Header" + ] + } + } }, { + "type": "Title", "element_id": "0a4152d3ee312a3d28cc2b63d6f59a6e", + "text": "Table 2 Description of \ufb01le format for each problem instance.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Table 2 Description of file format for each problem instance.", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d66486bdc6e5b4d6e2018f7da6d0b0d0", + "text": "Number of lines", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Number of lines", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6c56043a98b068693db3cd6ded0bc020", + "text": "Number of columns in each line", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Number of columns in each line", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "2fc6800b1896d3d2779ee6e98794bdb1", + "text": "Description", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Description", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a5efd069cfcb8d3c983dfab2b9336b0e", + "text": "1 1 n", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1 1 n", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "1d96bbba9ffa9a12e81da0426f80a9fc", + "text": "l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "l", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "25f80b4c6652f9af1a6883a6e4b8c0bb", + "text": "3 m 4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "3 m 4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "516ec572955aa07f031d27cc89008615", + "text": "l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "l", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ebd5a6aeac91e0f42fecb980ef4a648a", + "text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rd at each depot d. One line for each trip, i \u00bc 1; 2; \u2026; n. Each line provides the start location ls time ts i and the end time te i for the corresponding trip. Each element, \u03b4ij; where i; j A 1; 2; \u2026; l, refers to the travel time between location i and location j.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The number of depots, the number of trips, and the number of locations. The number of vehicles rd at each depot d. One line for each trip, i ¼ 1; 2; …; n. Each line provides the start location ls time ts i and the end time te i for the corresponding trip. Each element, δij; where i; j A 1; 2; …; l, refers to the travel time between location i and location j.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "50fb8c466c52d5ae755055ffc24a418d", + "text": "i , the start", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "i , the start", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "44a4c21af61b74e9f30be3112d9eb1e7", + "text": "i , the end location le", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "i , the end location le", - "type": "Title" + ] + } + } }, { + "type": "ListItem", "element_id": "0f605e650a81abc6b5a30423d60d0975", + "text": "2. Experimental design, materials, and methods", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2. Experimental design, materials, and methods", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "37200c447b8f7e1443b707c1e76e66b0", + "text": "The procedure presented by Carpaneto et al. in [1] is used to generate the problem instances. The same procedure has been used by Pepin et al. in [4] to generate the benchmark dataset of the MDVSP. A detailed description of the procedure is presented in [3].", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The procedure presented by Carpaneto et al. in [1] is used to generate the problem instances. The same procedure has been used by Pepin et al. in [4] to generate the benchmark dataset of the MDVSP. A detailed description of the procedure is presented in [3].", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "92e466c917445c0d473eea592acc3b72", + "text": "Our dataset provides start/end location and time of trips as well as the travel time between any two locations. The location and time information is required to model the MDVSP on a time-space network. The feasible connections and the cost of connections between the trips can be obtained as discussed in [3]. Thus, the dataset has all the information that is required to model the MDVSP on the time-space network (see [2]) as well as the connection-network (see [5]). The benchmark solutions for all the problem instances are presented in [3].", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Our dataset provides start/end location and time of trips as well as the travel time between any two locations. The location and time information is required to model the MDVSP on a time-space network. The feasible connections and the cost of connections between the trips can be obtained as discussed in [3]. Thus, the dataset has all the information that is required to model the MDVSP on the time-space network (see [2]) as well as the connection-network (see [5]). The benchmark solutions for all the problem instances are presented in [3].", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d89dfb5247b731abfe90aedc46c09806", + "text": "Transparency document. Supporting information", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Transparency document. Supporting information", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9a157bb2a3ee3ac55ecf743df0020ce9", + "text": "Transparency document associated with this article can be found in the online version at https://doi.", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 94, "text": ":// doi", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 94 } ], - "page_number": 4 - }, - "text": "Transparency document associated with this article can be found in the online version at https://doi.", - "type": "NarrativeText" - }, - { - "element_id": "fb1ccb68103598fae7cc8128c97711d9", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "fb1ccb68103598fae7cc8128c97711d9", + "text": "org/10.1016/j.dib.2018.12.055.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, "text": "org / 10 . 1016 / j . dib . 2018 . 12 . 055", - "url": "https://doi.org/10.1016/j.dib.2018.12.055" + "url": "https://doi.org/10.1016/j.dib.2018.12.055", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "org/10.1016/j.dib.2018.12.055.", - "type": "UncategorizedText" - }, - { - "element_id": "a63064fd9987765c33c9d20047dc2f15", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "References", - "type": "Title" + ] + } + } }, { - "element_id": "909007a841d32eb20886f7fc2d923911", + "type": "Title", + "element_id": "a63064fd9987765c33c9d20047dc2f15", + "text": "References", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "909007a841d32eb20886f7fc2d923911", + "text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "G . Carpaneto , M . Dell ' Amico , M . Fischetti , P . Toth ,", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1" + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 4 } ], - "page_number": 4 - }, - "text": "[1] G. Carpaneto, M. Dell'Amico, M. Fischetti, P. Toth, A branch and bound algorithm for the multiple depot vehicle scheduling", - "type": "NarrativeText" - }, - { - "element_id": "b1902a32b19337484e93efd9509a07c1", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "b1902a32b19337484e93efd9509a07c1", + "text": "problem, Networks 19 (5) (1989) 531\u2013548.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "problem , Networks19 ( 5 )( 1989 ) 531 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1" + "text": "problem , Networks19 ( 5 )( 1989 ) 531 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 0 }, { - "start_index": 0, - "text": "problem , Networks19 ( 5 )( 1989 ) 531 – 548", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1" + "text": "problem , Networks19 ( 5 )( 1989 ) 531 \u2013 548", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref1", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "problem, Networks 19 (5) (1989) 531–548.", - "type": "UncategorizedText" - }, - { - "element_id": "5a7cc4a5afb4c97c546a3b64cb4f593f", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "5a7cc4a5afb4c97c546a3b64cb4f593f", + "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time\u2013space network based exact optimization model for multi-depot bus scheduling, Eur.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, - "text": "N . Kliewer , T . Mellouli , L . Suhl , Atime – spacenetworkbasedexactoptimizationmodelformulti - depotbusscheduling , Eur", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2" + "text": "N . Kliewer , T . Mellouli , L . Suhl , Atime \u2013 spacenetworkbasedexactoptimizationmodelformulti - depotbusscheduling , Eur", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 4 } ], - "page_number": 4 - }, - "text": "[2] N. Kliewer, T. Mellouli, L. Suhl, A time–space network based exact optimization model for multi-depot bus scheduling, Eur.", - "type": "NarrativeText" - }, - { - "element_id": "6a1cb7145ede91c5d2e6bb53b4d59f65", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "6a1cb7145ede91c5d2e6bb53b4d59f65", + "text": "J. Oper. Res. 175 (3) (2006) 1616\u20131627.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2" + "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 0 }, { - "start_index": 0, - "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 – 1627", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2" + "text": "J . Oper . Res . 175 ( 3 )( 2006 ) 1616 \u2013 1627", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref2", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "J. Oper. Res. 175 (3) (2006) 1616–1627.", - "type": "UncategorizedText" - }, - { - "element_id": "439a02aad982d445100cc246cd066b53", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "439a02aad982d445100cc246cd066b53", + "text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "S . Kulkarni , M . Krishnamoorthy , A . Ranade , A . T . Ernst , R . Patil , Anewformulationandacolumngeneration -", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3" + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 4 } ], - "page_number": 4 - }, - "text": "[3] S. Kulkarni, M. Krishnamoorthy, A. Ranade, A.T. Ernst, R. Patil, A new formulation and a column generation-based heuristic", - "type": "UncategorizedText" - }, - { - "element_id": "46a8bd54aa6c1bd32118f4a681faaec9", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "46a8bd54aa6c1bd32118f4a681faaec9", + "text": "for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457\u2013487.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3" + "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 0 }, { - "start_index": 0, - "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 – 487", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3" + "text": "forthemultipledepotvehicleschedulingproblem , Transp . Res . PartBMethodol . 118 ( 2018 ) 457 \u2013 487", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref3", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "for the multiple depot vehicle scheduling problem, Transp. Res. Part B Methodol. 118 (2018) 457–487.", - "type": "NarrativeText" - }, - { - "element_id": "f60e59177f5f0e53e3f285fa68a8e3ef", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "NarrativeText", + "element_id": "f60e59177f5f0e53e3f285fa68a8e3ef", + "text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of \ufb01ve heuristics for the multiple depot vehicle scheduling", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "A . S . Pepin , G . Desaulniers , A . Hertz , D . Huisman ,", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4" + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4", + "start_index": 4 } ], - "page_number": 4 - }, - "text": "[4] A.S. Pepin, G. Desaulniers, A. Hertz, D. Huisman, A comparison of five heuristics for the multiple depot vehicle scheduling", - "type": "NarrativeText" - }, - { - "element_id": "0f8229a10050ec65ae5b6f9f66c6ca47", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "0f8229a10050ec65ae5b6f9f66c6ca47", + "text": "problem, J. Sched. 12 (1) (2009) 17.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, "text": "problem , J . Sched . 12 ( 1 )( 2009 ) 17", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4" + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref4", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "problem, J. Sched. 12 (1) (2009) 17.", - "type": "UncategorizedText" - }, - { - "element_id": "9f411677c0a8ddb06047e600b348e282", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "9f411677c0a8ddb06047e600b348e282", + "text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1)", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 4, "text": "C . C . Ribeiro , F . Soumis , Acolumngenerationapproachtothemultiple - depotvehicleschedulingproblem , Oper . Res . 42 ( 1", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5" + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 4 } ], - "page_number": 4 - }, - "text": "[5] C.C. Ribeiro, F. Soumis, A column generation approach to the multiple-depot vehicle scheduling problem, Oper. Res. 42 (1)", - "type": "UncategorizedText" - }, - { - "element_id": "e37f78c7271830eb805f560368fec7cc", - "metadata": { + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "UncategorizedText", + "element_id": "e37f78c7271830eb805f560368fec7cc", + "text": "(1994) 41\u201352.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 0, - "text": "( 1994 ) 41 –", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5" + "text": "( 1994 ) 41 \u2013", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 0 }, { - "start_index": 0, - "text": "( 1994 ) 41 – 52", - "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5" + "text": "( 1994 ) 41 \u2013 52", + "url": "http://refhub.elsevier.com/S2352-3409(18)31594-4/sbref5", + "start_index": 0 } ], - "page_number": 4 - }, - "text": "(1994) 41–52.", - "type": "UncategorizedText" + "page_number": 4, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "94e316e08a4a19eed59d29d5d58703ce", + "text": "487", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-api/75/29/main.PMC6312793.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "487", - "type": "UncategorizedText" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.json similarity index 63% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.json index 282191fd06..ed4d55b17c 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-path/07/07/sbaa031.073.PMC7234218.json @@ -1,282 +1,310 @@ [ { + "type": "Header", "element_id": "13c2cd4a987063cb9fe6802f8d9d8bba", + "text": "S32", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "S32", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6e95de55fbc805ac11d5e168881e41eb", + "text": "ns; 40 mg/day=3.6%, p<0.05; 80 mg/day=4.9%, p<0.01; 120 mg/day=9.3%, p<0.001, PM dosing group: 20 mg/day=-0.4%, ns; 40 mg/day=2.8%, p<0.05: 80 mg/day=0.2%, ns; 160 mg/day=5.8%, p<0.05). There was no clear dose-dependent trend associated with nausea and RD was similar between AM and PM dosing group (AM dosing group: 20 mg/ day=0.2% ns; 40 mg/day=3.8%, p<0.05; 80 mg/day=3.8%, ns; 120 mg/ day=6.6%, ns, PM dosing group: 20 mg/day=-1.6%, ns; 40 mg/day=-1.7%, ns; 80 mg/day=5.5%, p<0.01; 160 mg/day=2.8%, ns). Discussion: The risk of adverse events in the treatment of schizophrenia with lurasidone can vary depending on the timing of administration. In particular, for akathisia and somnolence, the incidence risks were reduced when lurasidone was administered in PM. Unlike with AM administration, the dose-dependence in the risks of these adverse events were not observed in lurasidone PM administration. The timing of lurasidone administration could be considered in effort to minimize potential adverse events.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "ns; 40 mg/day=3.6%, p<0.05; 80 mg/day=4.9%, p<0.01; 120 mg/day=9.3%, p<0.001, PM dosing group: 20 mg/day=-0.4%, ns; 40 mg/day=2.8%, p<0.05: 80 mg/day=0.2%, ns; 160 mg/day=5.8%, p<0.05). There was no clear dose-dependent trend associated with nausea and RD was similar between AM and PM dosing group (AM dosing group: 20 mg/ day=0.2% ns; 40 mg/day=3.8%, p<0.05; 80 mg/day=3.8%, ns; 120 mg/ day=6.6%, ns, PM dosing group: 20 mg/day=-1.6%, ns; 40 mg/day=-1.7%, ns; 80 mg/day=5.5%, p<0.01; 160 mg/day=2.8%, ns). Discussion: The risk of adverse events in the treatment of schizophrenia with lurasidone can vary depending on the timing of administration. In particular, for akathisia and somnolence, the incidence risks were reduced when lurasidone was administered in PM. Unlike with AM administration, the dose-dependence in the risks of these adverse events were not observed in lurasidone PM administration. The timing of lurasidone administration could be considered in effort to minimize potential adverse events.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c0ad446ac0e663713724aa5f42d20448", + "text": "S6. SLEEP ENDOPHENOTYPES OF SCHIZOPHRENIA: A HIGH-DENSITY EEG STUDY IN DRUG-NA\u00cfVE, FIRST EPISODE PSYCHOSIS PATIENTS", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "S6. SLEEP ENDOPHENOTYPES OF SCHIZOPHRENIA: A HIGH-DENSITY EEG STUDY IN DRUG-NAÏVE, FIRST EPISODE PSYCHOSIS PATIENTS", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "21facf77763c3e990a3db1b8626c133a", + "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D\u2019Agostino*3 1Faculty of Biomedical Sciences, Universit\u00e0 della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King\u2019s College London, England; 3Universit\u00e0 degli Studi di Milano, Italy", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Anna Castelnovo1, Cecilia Casetta2, Francesco Donati3, Renata del Giudice3, Caroline Zangani3, Simone Sarasso3, Armando D’Agostino*3 1Faculty of Biomedical Sciences, Università della Svizzera Italiana, Switzerland; 2Institute of Psychiatry, Psychology and Neuroscience, King’s College London, England; 3Università degli Studi di Milano, Italy", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "26b6989522e94c2c7ef5c2633e41cf72", + "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high\u2013density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1\u20134 Hz) was lower in FEP compared to HC but this difference didn\u2019t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Background: Slow waves, the hallmark of the deep nonrapid eye move- ment sleep electroencephalogram (EEG), are critical for restorative sleep and brain plasticity. They arise from the synchronous depolarization and hyperpolarization of millions of cortical neurons and their proper gen- eration and propagation relies upon the integrity of widespread cortico- thalamic networks. Slow wave abnormalities have been reported in patient with Schizophrenia, although with partially contradictory results, probably related to antipsychotic and sedative medications. Recently, their presence and delineation, have been convincingly shown in first-episode psychosis patients (FEP). However, clear evidence of this biomarker at the onset of the disease, prior to any psychopharmacological intervention, remains limited. Moreover, no attempt has been made to elucidate the prognostic meaning of this finding. Methods: We collected whole night sleep high–density electroencephalog- raphy recordings (64-channel BrainAmp, Brain Products GmbH, Gilching, Germany) in 20 drug-naive FEP patients and 20 healthy control subjects (HC). Several clinical psychometric scales as well as neurocognitive tests were administered to all subjects in order to better define psychopatholog- ical status and vulnerability. EEG slow wave activity (SWA, spectral power between 1 and 4 Hz) and several slow wave parameters were computed at each electrode location, including density and amplitude, at each electrode location. Along with a group analysis between FEP and HC, a subgroup analysis was also computed between patients who showed a progression of symptoms to full-blown Schizophrenia (SCZ, n = 10) over the next 12-month follow-up and those who did not (OTH, n = 10). Results: Sleep macro-architecture was globally preserved in FEP patients. SWA (1–4 Hz) was lower in FEP compared to HC but this difference didn’t reach statistical significance. Slow wave density was decreased in FEP compared to HC, with a significance that survived multiple comparison correction over a large fronto-central cluster. Mean amplitude was pre- served. At the subgroup analysis, these results were largely driven by the subgroup of patients with a confirmed diagnosis of SCZ at a 12-month fol- low-up. Indeed, no difference could be found between OTH and HC, while a strong significance was still evident between SCZ and HC.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "b38798d4ed1cda1c49ed2db924d40039", + "text": "SIRS 2020 Abstracts", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "SIRS 2020 Abstracts", - "type": "Footer" + ] + } + } }, { + "type": "Header", "element_id": "6681a3fc2e2bbc7efabbf221baaeec6b", + "text": "Poster Session I", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Poster Session I", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "418368d1fe238e68fc6c8663f7485649", + "text": "Discussion: Our data confirm previous findings on reduced slow wave density in FEP, and expand them to acute subjects, before any treatment is prescribed. This is in line with available data on diffuse abnormalities of cortico-cortical and cortico-thalamic networks in these patients. Interestingly, our data also offer preliminary evidence that this deficit is specific for SCZ, as it appears to differentiate patients who developed SCZ from those with other diagnoses at follow-up. Given the traveling properties of slow waves, future research should establish their potential as markers of connectivity in SCZ.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Discussion: Our data confirm previous findings on reduced slow wave density in FEP, and expand them to acute subjects, before any treatment is prescribed. This is in line with available data on diffuse abnormalities of cortico-cortical and cortico-thalamic networks in these patients. Interestingly, our data also offer preliminary evidence that this deficit is specific for SCZ, as it appears to differentiate patients who developed SCZ from those with other diagnoses at follow-up. Given the traveling properties of slow waves, future research should establish their potential as markers of connectivity in SCZ.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2693595cd6fc5be02dc752b089f85eea", + "text": "S7. INVESTIGATING THE LINK BETWEEN THE PERIPHERAL ENDOCANNABINOID SYSTEM AND CENTRAL GLUTAMATERGIC NEUROTRANSMISSION IN EARLY PSYCHOSIS: A 7T-MRS STUDY", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "S7. INVESTIGATING THE LINK BETWEEN THE PERIPHERAL ENDOCANNABINOID SYSTEM AND CENTRAL GLUTAMATERGIC NEUROTRANSMISSION IN EARLY PSYCHOSIS: A 7T-MRS STUDY", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3f2d8de4445801a7562416267c06a877", + "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Amedeo Minichino*1, Beata Godlewska1, Philip Cowen1, Philip Burnet1, Belinda Lennox1 1University of Oxford", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "741c946db28df5068fb60063dad37d27", + "text": "Background: Meta-analytic evidence showed increased levels of periph- eral endocannabinoid metabolites in psychotic illness. Alterations in the endocannabinoid system are believed to compromise glutamate and do- pamine transmission, which play a central role in pathophysiological models of psychosis. I will present preliminary data from an ongoing high-field proton magnetic resonance spectroscopy (MRS) study aimed at investigating the association between peripheral levels of endocannabinoid system metabolites and central glutamate metabolism in individuals at their first non-affective psychotic episode (NA-FEP) and healthy controls. Methods: We expect to recruit 17 NA-FEP and 20 healthy controls by January 2020. Currently, we recruited 12 NA-FEP and 18 healthy controls from two different research facilities (Imperial College London and University of Oxford) as part of a cross-sectional study. Participants un- derwent MRS scanning at 7-T with voxels placed in right dorsolateral prefrontal cortex (right-DLPFC), anterior cingulate cortex (ACC), and oc- cipital cortex. Neuro-metabolites will be calculated using the unsuppressed water signal as reference. Endocannabinoid metabolites were quantified from serum samples, collected during the same imaging session. Results: Analyses are ongoing. Based on previous evidence, expected findings are: (i) reduced glutamate levels in the ACC and right-DLPFC of NA-FEP compared to controls; (ii) increased peripheral endocannabinoid metabolites in NA-FEP compared to controls; and (iii) inverse association between peripheral endocannabinoid metabolites and glutamate levels in ACC and right-DLPFC in NA-FEP Discussion: This study will help clarifying the contribution of peripheral endocannabinoid system to central brain mechanisms of key relevance for psychotic illness. It will also add further evidence on the limited literature on high-resolution characterisation of brain metabolites in early psychosis. Strengths of the study include: (i) use of high-field MRS, which allows the estimation of glutamate-related compounds at higher precision than at lower field strength; (ii) reduced heterogeneity of the clinical sample (only male and NA-FEP). Limitations: small sample size and cross-sectional design.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Background: Meta-analytic evidence showed increased levels of periph- eral endocannabinoid metabolites in psychotic illness. Alterations in the endocannabinoid system are believed to compromise glutamate and do- pamine transmission, which play a central role in pathophysiological models of psychosis. I will present preliminary data from an ongoing high-field proton magnetic resonance spectroscopy (MRS) study aimed at investigating the association between peripheral levels of endocannabinoid system metabolites and central glutamate metabolism in individuals at their first non-affective psychotic episode (NA-FEP) and healthy controls. Methods: We expect to recruit 17 NA-FEP and 20 healthy controls by January 2020. Currently, we recruited 12 NA-FEP and 18 healthy controls from two different research facilities (Imperial College London and University of Oxford) as part of a cross-sectional study. Participants un- derwent MRS scanning at 7-T with voxels placed in right dorsolateral prefrontal cortex (right-DLPFC), anterior cingulate cortex (ACC), and oc- cipital cortex. Neuro-metabolites will be calculated using the unsuppressed water signal as reference. Endocannabinoid metabolites were quantified from serum samples, collected during the same imaging session. Results: Analyses are ongoing. Based on previous evidence, expected findings are: (i) reduced glutamate levels in the ACC and right-DLPFC of NA-FEP compared to controls; (ii) increased peripheral endocannabinoid metabolites in NA-FEP compared to controls; and (iii) inverse association between peripheral endocannabinoid metabolites and glutamate levels in ACC and right-DLPFC in NA-FEP Discussion: This study will help clarifying the contribution of peripheral endocannabinoid system to central brain mechanisms of key relevance for psychotic illness. It will also add further evidence on the limited literature on high-resolution characterisation of brain metabolites in early psychosis. Strengths of the study include: (i) use of high-field MRS, which allows the estimation of glutamate-related compounds at higher precision than at lower field strength; (ii) reduced heterogeneity of the clinical sample (only male and NA-FEP). Limitations: small sample size and cross-sectional design.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c1543aee0d7efb59052757f7b83a70a9", + "text": "S8. GRIN1 PROMOTER METHYLATION CHANGES IN BLOOD OF EARLY-ONSET PSYCHOTIC PATIENTS AND UNAFFECTED SIBLINGS WITH CHILDHOOD TRAUMA", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "S8. GRIN1 PROMOTER METHYLATION CHANGES IN BLOOD OF EARLY-ONSET PSYCHOTIC PATIENTS AND UNAFFECTED SIBLINGS WITH CHILDHOOD TRAUMA", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5afb27a02de3e7a95c0f2fa442e32526", + "text": "Camila Loureiro*1, Corsi-Zuelli Fabiana1, Fachim Helene Aparecida1, Shuhama Rosana1, Menezes Paulo Rossi1, Dalton Caroline F2,", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Camila Loureiro*1, Corsi-Zuelli Fabiana1, Fachim Helene Aparecida1, Shuhama Rosana1, Menezes Paulo Rossi1, Dalton Caroline F2,", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "0d80b62dd72121dd5263df8605849cf4", + "text": "AQ3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/biomed-path/07/07/sbaa031.073.PMC7234218.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "AQ3", - "type": "Title" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/2023-Jan-economic-outlook.json similarity index 54% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/2023-Jan-economic-outlook.json index 10404f22ce..3a3676cfb0 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/2023-Jan-economic-outlook.json @@ -1,4762 +1,5238 @@ [ { + "type": "Header", "element_id": "e5318630cd973733087506eca36a6be3", + "text": "INTERNATIONAL MONETARY FUND", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "INTERNATIONAL MONETARY FUND", - "type": "Header" + ] + } + } }, { + "type": "Title", "element_id": "8466f1c7e05ce04838ff95211c4fff50", + "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "04fca18cc5aea2fdb24b55c01f4fa968", + "text": "2023 JAN", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "2023 JAN", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "d0ea1cbc2fc75a58c85e4f17f3edc488", + "text": "Inflation Peaking amid Low Growth", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Inflation Peaking amid Low Growth", - "type": "Title" + ] + } + } }, { + "type": "ListItem", "element_id": "740d58adc74131ca7b1fba179c2ec112", + "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000\u201319) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia\u2019s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017\u201319) levels of about 3.5 percent.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000–19) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia’s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017–19) levels of about 3.5 percent.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "d30d4c1fc74320d326372cd65a30c0ce", + "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia\u2019s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "5aaef92421bbff28438f130639bb9ad4", + "text": "In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "59a0f3668027e1de59d41c3e44e8c706", + "text": "The global fight against inflation, Russia\u2019s war in Ukraine, and a resurgence of COVID-19 in China weighed on global economic activity in 2022, and the first two factors will continue to do so in 2023.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "The global fight against inflation, Russia’s war in Ukraine, and a resurgence of COVID-19 in China weighed on global economic activity in 2022, and the first two factors will continue to do so in 2023.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "65937c6c360b320fa5c45344fd86bc42", + "text": "Despite these headwinds, real GDP was surprisingly strong in the third quarter of 2022 in numerous economies, including the United States, the euro area, and major emerging market and developing economies. The sources of these surprises were in many cases domestic: stronger-than-expected private consumption and investment amid tight labor markets and greater-than-anticipated fiscal support. Households spent more to satisfy pent-up demand, particularly on services, partly by drawing down their stock of savings as economies reopened. Business investment rose to meet demand. On the supply side, easing bottlenecks and declining transportation costs reduced pressures on input prices and allowed for a rebound in previously constrained sectors, such as motor vehicles. Energy markets have adjusted faster than expected to the shock from Russia\u2019s invasion of Ukraine.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Despite these headwinds, real GDP was surprisingly strong in the third quarter of 2022 in numerous economies, including the United States, the euro area, and major emerging market and developing economies. The sources of these surprises were in many cases domestic: stronger-than-expected private consumption and investment amid tight labor markets and greater-than-anticipated fiscal support. Households spent more to satisfy pent-up demand, particularly on services, partly by drawing down their stock of savings as economies reopened. Business investment rose to meet demand. On the supply side, easing bottlenecks and declining transportation costs reduced pressures on input prices and allowed for a rebound in previously constrained sectors, such as motor vehicles. Energy markets have adjusted faster than expected to the shock from Russia’s invasion of Ukraine.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c4c506d0a0d76bff337132fc5694b047", + "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most\u2014though not all\u2013\u2013major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most—though not all––major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5eddf931370d9dc4f44c74d11fef7235", + "text": "COVID-19 deepens China\u2019s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China\u2019s slowdown has reduced global trade growth and international commodity prices.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "COVID-19 deepens China’s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China’s slowdown has reduced global trade growth and international commodity prices.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ff41d36cbf2aa336c6307362c6eda73b", + "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle\u2014such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks\u2019 communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle—such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks’ communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "8d11a0a100fbb2971f88abf9b65c6f6f", + "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "a9f244787beaed9cee9487192a9a595f", + "text": "United States", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "United States", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "dcf558e8bb6cab51a0d352d807d60f16", + "text": "Median country Brazil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Median country Brazil", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "cdd95d6fc1603d1c87d82ef501854019", + "text": "Euro area", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Euro area", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e8583c536368cc6c911b43e5143e25c9", + "text": "18 16 14 12 10 8 6 4 2 0 \u20132", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "18 16 14 12 10 8 6 4 2 0 –2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "ListItem", "element_id": "615c24c673403094c22f130a46a530a5", + "text": "1. Headline Inflation", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "1. Headline Inflation", - "type": "ListItem" + ] + } + } }, { + "type": "Title", "element_id": "9f5cfa9834b1d2ff3c5521a7184cd340", + "text": "Jul. 21", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 21", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "9957f73d3de7d70b81454adbc9f8e32f", + "text": "Jan. 21", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 21", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "8bab60d3645cdba3931ec74ece3630a7", + "text": "Jul. 20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 20", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6fed287b1c9221b472f5312786c67849", + "text": "Jan. 20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 20", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b06114dfb73145f7f7272913ac24e9d5", + "text": "Jan. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 22", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8f126b0ef94308f6487c821277d3709b", + "text": "Jan. 2019", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 2019", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "cae8bf06d9195af03b1e63ddb8fde903", + "text": "Jul. 19", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 19", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f24f7eca9e5579162877451819b51872", + "text": "16 14 12 10 8 6 4 2 0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "16 14 12 10 8 6 4 2 0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "ListItem", "element_id": "ca9b8fba67ab4ef8775b93c44cefe49f", + "text": "2. Core Inflation", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "2. Core Inflation", - "type": "ListItem" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "daf6be19231dcf542cf85401482fb741", + "text": "\u20132", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "–2", - "type": "UncategorizedText" + "page_number": 3, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c5a631b344d8d0c236fa27a7a8a65164", + "text": "Jan. 2019", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 2019", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "9ee4179ed9bf366ff8b72f1308bf8be5", + "text": "Jan. 20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 20", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3b08be006371ec383cef269c5386c1bf", + "text": "Jul. 20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 20", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "51e0969ceab966b017d2e4f117cb96bc", + "text": "Jan. 21", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 21", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "651a46d412cb4526f276a34d4feffe23", + "text": "Jul. 21", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 21", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "799a0349babed3849e4459f956ac682a", + "text": "Jan. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jan. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "01b53e01707c976d8678b1f9248d65c3", + "text": "Jul. 19", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 19", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6d7e9fcf6feed7e9c8327587aeb582c4", + "text": "Sources: Haver Analytics; and IMF staff calculations. Note: The figure shows the developments in headline and core inflation across 18 advanced economies and 17 emerging market and developing economies. Core inflation is the change in prices for goods and services, but excluding those for food and energy (or the closest available measure). For the euro area (and other European countries for which the data are available), energy, food, alcohol, and tobacco are excluded. The gray bands depict the 10th to 90th percentiles of inflation across economies.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Sources: Haver Analytics; and IMF staff calculations. Note: The figure shows the developments in headline and core inflation across 18 advanced economies and 17 emerging market and developing economies. Core inflation is the change in prices for goods and services, but excluding those for food and energy (or the closest available measure). For the euro area (and other European countries for which the data are available), energy, food, alcohol, and tobacco are excluded. The gray bands depict the 10th to 90th percentiles of inflation across economies.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0a1364706cc753cfc4b55c80040952e4", + "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience\u2013\u2013which is", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience––which is", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "9c34cd1bd6590df995576e48fbdea8f6", + "text": "Jul. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d09a9c4ce703c06e4ce559efa443e717", + "text": "Jul. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Jul. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "54bdf760cbee308c8d232c9e7123aff2", + "text": "Nov. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Nov. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "96f14dcc87bd92c7f68dce67447da2f8", + "text": "Nov. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Nov. 22", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d5a1e39f748c3a7e5182bf1bf788fd2e", + "text": "visible in consumption and investment data for the third quarter\u2013\u2013partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "visible in consumption and investment data for the third quarter––partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "dcb39d32c2ba39c7992b195265cd6a54", + "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita\u2014which often happens when there is a global recession\u2014is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000\u201319) annual average of 3.8 percent.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita—which often happens when there is a global recession—is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000–19) annual average of 3.8 percent.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "81741aab9bfa1b9a8a65801895eb798c", + "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation\u2013\u2013 especially in advanced economies\u2013\u2013as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation–– especially in advanced economies––as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "aaedb0d3f5b2389c5b245d23220784ea", + "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2cab8e041796d4eb82effb911ab19658", + "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "f173a9623b74a6662d8a5009a1e748c5", + "text": "", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f9a4ddf6effe8c0aa38b33e4e156fffe", + "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f6994b00e921e0df3e1cccb7d24b440b", + "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "b05cf1dcd98e261babc974c759f2d255", + "text": "Growth in the euro area is projected to bottom out at 0.7 percent in 2023 before rising to 1.6 percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Growth in the euro area is projected to bottom out at 0.7 percent in 2023 before rising to 1.6 percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "1d8df1715fe00d1fa5ee485b589c378d", + "text": "Growth in the United Kingdom is projected to be \u20130.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Growth in the United Kingdom is projected to be –0.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "24208d35a2ec201b4c8cb6632bbfddb0", + "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0687304d8d35a8b96a4e5f3538eebb15", + "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "e2c936ece69985b9479096639d18856a", + "text": "Growth in emerging and developing Asia is expected to rise in 2023 and 2024 to 5.3 percent and 5.2 percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China\u2019s economy. China\u2019s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent\u2014the first time in more than 40 years with China\u2019s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Growth in emerging and developing Asia is expected to rise in 2023 and 2024 to 5.3 percent and 5.2 percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "48660631565d41001ceb5e5a1d089971", + "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at \u20132.2 percent compared with a predicted \u20133.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "6d152b133a22fa74cba37f141f13ac46", + "text": "", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "36805b6b9815e8102abcac6c34121d09", + "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "5507388d3c1cb86078ee8c633c1ae322", + "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria\u2019s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "09d43a736c7d45a81461d001dab9f7d8", + "text": "", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "1dd483d0ba53cd4cc9e8da6f99852eea", + "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024\u2013\u2013above pre-pandemic (2017\u201319) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024––above pre-pandemic (2017–19) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2f7bf751a923d44ba6998c73bc67575e", + "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024\u2013\u2013above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017\u201319) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024\u2013\u2013still high, but close to the pre-pandemic average.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024––above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017–19) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024––still high, but close to the pre-pandemic average.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f7664a0efb897ec1e366c7c4ea1da725", + "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3cfa1edc1765b010559a286930405900", + "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ac077a4582b775a3b1772af3049a72df", + "text": "Year over Year", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Year over Year", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "07e7470cba4448ab972c28ac80d22978", + "text": "Difference from October 2022", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Difference from October 2022", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "65680b452d697d0f1d736ba73ecf49ea", + "text": "Q4 over Q4 2/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Q4 over Q4 2/", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2bfd58a92cc7299bd9201c324b36bc85", + "text": "2021", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2021", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "a8ab6b5d7be8b8dbc0b72c96896514bb", + "text": "Estimate 2022", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Estimate 2022", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "000bc6aed5ca41065f69e2e217802520", + "text": "Projections 2023", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Projections 2023", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0b0486e9998bb5cda0c10d438c0e988f", + "text": "2024", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2024", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "50d4004c96a93b112ec042a1dce4f99f", + "text": "WEO Projections 1/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "WEO Projections 1/", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1db0c63eb836f5bba130dbd068418db8", + "text": "2023", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2023", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "aacada1bfc82c84b135924f68c6d22c4", + "text": "2024", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2024", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "0e81224378204cf0f22b9120b2ae97fb", + "text": "Estimate 2022", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Estimate 2022", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "048d005189e7f36478b839b22871bf5b", + "text": "Projections 2023", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Projections 2023", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8af8ac3ba28229268635ee444058e0be", + "text": "2024", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2024", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "377315a5da3dd3d40a2fbfba1feeb64b", + "text": "World Output", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "World Output", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0d2b58bbf8ff6a91eb1c215f676db9db", + "text": "6.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "6.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "33c2c8e85ed31dfd51aefcad13524dc5", + "text": "3.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8ab6e155a2bfe5409ba0f9f1a3c3d47a", + "text": "2.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3aed9bf8c049e80397ed500d6ea0f234", + "text": "3.1", "metadata": { - "data_source": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fa119c4a0cab19501b5202346af2f13b", + "text": "0.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6480521b96d9d2ffa2fe738402c59d63", + "text": "\u20130.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d2fcc7a0c6897ce5e9d6693573bcc671", + "text": "1.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1680839745d0b446c8de584df461430d", + "text": "3.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6d2d3626e7a2604400eb1a3a6d9ee1ce", + "text": "3.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "e3eabb0a8958a339b9cc2d78032226c1", + "text": "Advanced Economies United States Euro Area", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Advanced Economies United States Euro Area", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3c5b269ca2d1717b6b479c4454272ebe", + "text": "Germany France Italy Spain", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Germany France Italy Spain", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "83f84d5e51617f1a1768afde96ddba90", + "text": "Japan United Kingdom Canada Other Advanced Economies 3/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Japan United Kingdom Canada Other Advanced Economies 3/", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "798522462635ac3049337b0a90ed296d", + "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d53d36d9bb27e2b287fa726ca6de1a85", + "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7ceedce5ef888c0b36f5f45bd36a928a", + "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 \u20130.6 1.5 2.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "236af5ecfdca247521433c6c820e0098", + "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3469cae4d967b25911bee28e3698d7b9", + "text": "0.1 0.4 0.2 0.4 0.0 0.8 \u20130.1 0.2 \u20130.9 0.0 \u20130.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1b0ca852ede5328ca44f16e354dcb334", + "text": "\u20130.2 \u20130.2 \u20130.2 \u20130.1 0.0 \u20130.4 \u20130.2 \u20130.4 0.3 \u20130.1 \u20130.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "35d3a343f97410defef5fab7c93fa657", + "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b882159579a2b2233ed3ad25b573e8ec", + "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 \u20130.5 1.2 2.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fc93109818079e3012758ffcdc87d5c5", + "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "0694d23dc3ff942a14a9b5f6241db2e2", + "text": "Emerging Market and Developing Economies Emerging and Developing Asia", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Emerging Market and Developing Economies Emerging and Developing Asia", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "5ca712bd0eb074accd30b681dc8e7781", + "text": "China India 4/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "China India 4/", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "492b2770dea1a1c53e915796028e661a", + "text": "Emerging and Developing Europe", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Emerging and Developing Europe", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "2fa274961abf0bbca6927cb7ea523458", + "text": "Russia", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Russia", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "09475886ab80a01ba9342c36d6d8bf11", + "text": "Latin America and the Caribbean", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Latin America and the Caribbean", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "669d677a171f799122039eb5bfd8d18e", + "text": "Brazil Mexico", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Brazil Mexico", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d22ccc7e325897d2fccf449add485237", + "text": "Middle East and Central Asia", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Middle East and Central Asia", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "7703c1b0a96ee46fa7122c288fa470f5", + "text": "Saudi Arabia Sub-Saharan Africa", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Saudi Arabia Sub-Saharan Africa", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f43102732396bd3bc6a21c072a51d862", + "text": "Nigeria South Africa", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Nigeria South Africa", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "07883b7e37ee2a6ff72599bf5109cc8c", + "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "bfbcdc4bcfd9eccf12b9674512279eee", + "text": "3.9 4.3 3.0 6.8 0.7 \u20132.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ef8a8168c1db58f65d3fc933311b72f7", + "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7a7a89b6f8a4bc1be4846fa9ee87999e", + "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "988703fc3f2230eca442dfcb7f5145ea", + "text": "0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 \u20130.4 \u20131.1 0.1 0.2 0.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a4550b86f83c3ef9c3c0cecde0fd70fc", + "text": "\u20130.1 0.0 0.0 0.0 0.1 0.6 \u20130.3 \u20130.4 \u20130.2 0.2 0.5 0.0 0.0 0.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7e532d3986e09e7b6bf2ffe283b116a1", + "text": "2.5 3.4 2.9 4.3 \u20132.0 \u20134.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d63f7320613299aae0c2108278e0982e", + "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8948a5f3f0480cbb4cbb3b46cdd3469f", + "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e48c3a0c3b26dc80bfcec9b65c0c0bf4", + "text": "Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "acc251938850095522dff1828a3b06e3", + "text": "6.0 5.5 3.8 4.1 7.0 4.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "6.0 5.5 3.8 4.1 7.0 4.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1278d486545d6e04c32d302ee21a5dbb", + "text": "3.1 3.7 5.2 5.4 3.8 4.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.1 3.7 5.2 5.4 3.8 4.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1552aefc5c8dcdb8705a416e64b1ed26", + "text": "2.4 0.7 4.3 3.2 4.0 4.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.4 0.7 4.3 3.2 4.0 4.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5e855506c048b691166fa369949c6ce8", + "text": "2.5 1.8 4.7 3.5 4.1 5.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.5 1.8 4.7 3.5 4.1 5.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a64a61d997219ce3b0a2e0eae5d6b9cd", + "text": "0.3 0.0 \u20130.2 \u20130.4 0.4 0.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.3 0.0 –0.2 –0.4 0.4 0.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d0fd597013f9dac098a20f2ec3b14931", + "text": "\u20130.1 \u20130.3 \u20130.2 0.2 0.0 0.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6c2dcf6d37eea9f1a9b5412ee9de0fc8", + "text": "1.7 1.8 3.7 . . . 2.5 . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "1.7 1.8 3.7 . . . 2.5 . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "414f00239d02b847c93d6f46f3cf38ab", + "text": "2.5 1.2 5.7 . . . 5.0 . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.5 1.2 5.7 . . . 5.0 . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b3117a10957ae34e314bdac25bfdfbc3", + "text": "2.5 2.0 4.0 . . . 4.1 . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.5 2.0 4.0 . . . 4.1 . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fab1cb6025046dfdbb64814adb52c5b8", + "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1117840fc536fed0f26936e1b42db48a", + "text": "10.4 9.4 12.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "10.4 9.4 12.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "31bc343c61e3b2553ba96b6236d32501", + "text": "5.4 6.6 3.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5.4 6.6 3.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3c5bb02d01912ad71db24921afaabcce", + "text": "2.4 2.3 2.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "2.4 2.3 2.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2adffb6f2b2fa905dc3205613c7b7e7d", + "text": "3.4 2.7 4.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.4 2.7 4.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "977534e3cac0d97f9ce3f7fad09f19c7", + "text": "\u20130.1 0.0 \u20130.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.1 0.0 –0.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5796d542dd188f1bea098319759d29af", + "text": "\u20130.3 \u20130.4 0.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.3 –0.4 0.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "901c4bf1e983db4acca49641d324e00b", + "text": ". . . . . . . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": ". . . . . . . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3d468884c9e34567ff1ef8c96a604cb4", + "text": ". . . . . . . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": ". . . . . . . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fad8e1cbfa1fab6b7e8946a3682e9226", + "text": ". . . . . . . . .", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": ". . . . . . . . .", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ba85fc9ea8b95acbd326450a13c4eb35", + "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ca26d5360d833f88319340d19f6d91f9", + "text": "65.8 26.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "65.8 26.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "780d27790d408ae7a8035ce234a38005", + "text": "39.8 7.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "39.8 7.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3aaa1339fc30800c4351b17e6e6982ce", + "text": "\u201316.2 \u20136.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–16.2 –6.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "45dc7b4e56aa370a9377eb95fc3f2616", + "text": "\u20137.1 \u20130.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–7.1 –0.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "bf47674c5311dabb4eee6181417ce38e", + "text": "\u20133.3 \u20130.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–3.3 –0.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9271401c7032ff585e56c6f8bde4c5bc", + "text": "\u20130.9 0.3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–0.9 0.3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b2098d746938179abd97aaa69077a3aa", + "text": "11.2 \u20132.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "11.2 –2.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4d5bacccc818e9e1d56bdde744ae6197", + "text": "\u20139.8 1.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–9.8 1.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e06eda1b357c1dcbbd6a5565f6e0c0d7", + "text": "\u20135.9 \u20130.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "–5.9 –0.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3d2031a36227cd1e6f16dba4b83c285b", + "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "704d3ef8dad84ee54620a7500e273b70", + "text": "4.7 3.1 5.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "4.7 3.1 5.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "993388c0e948c3bddbaf9691afd92838", + "text": "8.8 7.3 9.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "8.8 7.3 9.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "154d3131ad7e9d06dbfa40228665a61e", + "text": "6.6 4.6 8.1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "6.6 4.6 8.1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "eb26885af062db4d802565d428d76998", + "text": "4.3 2.6 5.5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "4.3 2.6 5.5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8fd90392a9b363d37292b12798a93491", + "text": "0.1 0.2 0.0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.1 0.2 0.0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6a1bd6b1143e5c3e3c0ea3db5ef33ae4", + "text": "0.2 0.2 0.2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0.2 0.2 0.2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e42338b80e2363209d554914579623d7", + "text": "9.2 7.8 10.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "9.2 7.8 10.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "23045d4af2645b4a93641cc13c172dd4", + "text": "5.0 3.1 6.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5.0 3.1 6.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "402936a0a1998b53c0b54d96c034a9a6", + "text": "3.5 2.3 4.5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "3.5 2.3 4.5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "dc5e5f30cd70d800897a30bd4bd11e5a", + "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6e609c51c0ad931e7bf3efc51dee09fb", + "text": "Upside risks\u2014Plausible upside risks include more favorable surprises to domestic spending\u2014as in the third quarter of 2022\u2014which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Upside risks—Plausible upside risks include more favorable surprises to domestic spending—as in the third quarter of 2022—which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "aa80b6613fde9883fe82a94a2876d892", + "text": "Pent-up demand boost: Fueled by the stock of excess private savings from the pandemic fiscal support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption\u2014particularly of services, including tourism.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Pent-up demand boost: Fueled by the stock of excess private savings from the pandemic fiscal support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "29fb26c864eae1a96f25518379e328ee", + "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China.", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "7a37befbd8aff1ca7acba3cb792d2c84", + "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a \u201csofter\u201d landing with less monetary tightening.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "429013c86997932584548de1417913e9", + "text": "Downside risks\u2014Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Downside risks—Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "228ee96da0921f50e56fbe4b6c40701e", + "text": "China\u2019s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "b28d3dfdf6eb97d310d8dcb0814729db", + "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China\u2019s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "8fbfc5fbe7bbced6c52373e86628c0cc", + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China\u2019s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "d179b859b48c68150bd63aa1ef96c3f0", + "text": "", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "5c53b2d3f514a92cc6c099fd9a46b49a", + "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "389c8ab4eb91eba9123a994180a62f51", + "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at \uf0b7 pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7b52d0a28d30d99ec95754dfb2942f64", + "text": "Fragmentation could intensify\u2014with more restrictions on cross-border movements of capital, workers, and international payments\u2014and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Fragmentation could intensify—with more restrictions on cross-border movements of capital, workers, and international payments—and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6353663e5625ee630273596a45b9e85e", + "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks\u2019 balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks’ balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "34b47091a30485459353e84d55ed2a17", + "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures\u2014including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures—including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b55777af8a04f3d859bb0f01365f8a1e", + "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "27606edb0fcb1c392c409ef1a4c75c40", + "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ded2718e79ae3f0fc5b659084b75e9f7", + "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "cf75e58a3006a63bb97abb22762efd9b", + "text": "1 See \u201cGeo-Economic Fragmentation and the Future of Multilateralism,\u201d IMF Staff Discussion Note 2023/001.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "1 See “Geo-Economic Fragmentation and the Future of Multilateralism,” IMF Staff Discussion Note 2023/001.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a75d15403c885115c81b35b1607561a9", + "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9b05ae3ba743a0e32946b507bf15483a", + "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth\u2014 including market power, rent seeking, rigid regulation and planning, and inefficient education\u2014and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth— including market power, rent seeking, rigid regulation and planning, and inefficient education—and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b94fac35c203ff9c99f5e9706269ad60", + "text": "Strengthening multilateral cooperation\u2014Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Strengthening multilateral cooperation—Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "1a8b6e15ff17b4757b8d54e1fd34f5ff", + "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential. \uf0b7 Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty\u2019s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non\u2013 Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.  Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "21cc39455c68631dc614ef1de51e7185", + "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "fbeb9c12ae3fc66151947e15984ae9f9", + "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF\u2019s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks.", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "6281a28f343183e468644b742ca7ac42", + "text": "Speeding the green transition: To meet governments\u2019 climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", - "type": "ListItem" + ] + } + } }, { + "type": "NarrativeText", "element_id": "817afd40ff62023e9b9cbbe2e162eb2c", + "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates\u2014the terminal rate\u2014has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "82b9f83158ab0fb180b75245a65ffcc7", + "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "17e06a7f3549b931b73ad8398d412e5b", + "text": "7", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "7", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d59416ccb396537639ece4cca2084c73", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a5202539d153c11d5a002ad3bbc0fe47", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7006097fe7f1b4cf807245eca95c7771", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "96e9318ab9854527109a68dc48baca48", + "text": "United States Euro area China Other AEs Other EMs", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "United States Euro area China Other AEs Other EMs", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "dc28e8d24607e5f92be5c1c1ff90af32", + "text": "October 2022 GFSR", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "October 2022 GFSR", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f2e8a84aabe1d52a4edbbce358faaf36", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8626d35b0cb3b1b6dc1572366542dd5a", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e5a6423ee78d22bc2fdb87bfece277f5", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5dc03d12a89afd2b304fbe7b99c4b8ed", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4db14dd20e75d725a0cf2358fef572f8", + "text": "\u20131", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "–1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e1598defa1c6474f629b090cf9306e4c", + "text": "\u20132", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "–2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9e131a3e0644e66f918f6f97bde789c4", + "text": "\u20133", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "–3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9bb03875fe5c003f96c4268352e61a97", + "text": "2006 08 08", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "2006 08 08", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "12aa2034416bfe586534a21cb9f556e3", + "text": "06", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "06", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a18b380b9aba164e00b7fd9c69aba923", + "text": "10 10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "10 10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "88799b43be3b9864d75d79fef16a0c7c", + "text": "12 12", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "12 12", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1c9dddc0d25f75f256f1c8b2ee22ad10", + "text": "14 16 14", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "14 16 14", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c48ac6da781f1e0d69c4273c8d9ce84b", + "text": "16", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "16", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "585c5b84e2dca4edcb0ca9bcf35f8e67", + "text": "18 18", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "18 18", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0bd058ad4300d52217d4e2b1353b544a", + "text": "20 22 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "20 22 22", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fb18ecb78b4bec48bceb9322133ab576", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0d3b683fd3ccd5342a7e8aea51904aec", + "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c8ed4c00fbfc56ea7d279f80e997f89d", + "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors\u2019 anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia\u2019s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "8c9033e2d8d9c6b66a1768394af8db26", + "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "a7a04b7d4e340985edcd6bc1a5c64c63", + "text": "Latest", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Latest", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b2db4b018bb75781e9a4848e6ac7020e", + "text": "October 2022 GFSR", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "October 2022 GFSR", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a6a09ad4549efb054488d676c5b490cb", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "ListItem", "element_id": "fa2cba1da44db0b1fa1bb00f800c6795", + "text": "1. United States", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "1. United States", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "9cfac387ae387d235be367039604a36e", + "text": "2. Euro area", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "2. Euro area", - "type": "ListItem" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3c2672ead6d57ab6b5b22d57f245f976", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1ba58c3f82c1fb1312e15c3fff6703d0", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "369401f148c4821034f650018de8de4b", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "cdb00fe7da33524cbe17c35f2679fa7c", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "41a215b52c22783ad939f71f80260a75", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ec3717b3f83bcfd8a60b0e75d00e07d7", + "text": "Oct. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Oct. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d90675f2b6778ce1dc4f6ccd9e2113e8", + "text": "Apr. 23", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Apr. 23", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6a97de87bebc3677cc60b8d8824b659b", + "text": "Oct. 23", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Oct. 23", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3de60d9aa96f23213828a39360f02e7a", + "text": "Dec. 24", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Dec. 24", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "21464917ddc0313d760bfec9cefc829f", + "text": "Dec. 26", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Dec. 26", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3bcbdced2467b06a02ac20d2c78db671", + "text": "Oct. 22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Oct. 22", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d1b35d2d8c267463adb714fe51e33585", + "text": "Apr. 23", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Apr. 23", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "db08b4d72f753cb226f380aae1af62d1", + "text": "Oct. 23", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Oct. 23", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "cd09bff65934ad4ac43826e3e744fc33", + "text": "Dec. 24", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Dec. 24", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "40c47d43bea9b27ead13ee38bbc6e414", + "text": "Dec. 26", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Dec. 26", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4fa05dfb7cb9e5d00c6d2b43204c1a69", + "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5816aa115baaf11bb3b7014437a46e47", + "text": "Sources: Bloomberg Finance L.P.; and IMF staff calculations. Note: GFSR = Global Financial Stability Report.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Sources: Bloomberg Finance L.P.; and IMF staff calculations. Note: GFSR = Global Financial Stability Report.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "64b45e6c6df5b2e5d144288fb6f49cdc", + "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "99877b265fdc550ba0cd79d4d3e67f2a", + "text": "WEO Update \u00a9 2023 \u2022 ISBN: 979-8-40023-224-4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "WEO Update © 2023 • ISBN: 979-8-40023-224-4", - "type": "Footer" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9b5a89c366bbe0d0abf345be9c5a0474", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "79c4b6dd670d03b006b2c3879215d542", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a20c76bdd7b3a8df1a94bc0d5e319632", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7c71092575ab5ef9bdcf5b71c20fd184", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a8914d7a8c63bac134b97e8ec92f0ceb", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 11, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/2023-Jan-economic-outlook.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 11 - }, - "text": "1", - "type": "UncategorizedText" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/Silent-Giant-(1).json similarity index 53% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/Silent-Giant-(1).json index 52ed2b7faf..ed4c7c534d 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/Silent-Giant-(1).json @@ -1,3922 +1,4314 @@ [ { + "type": "Title", "element_id": "c8c98233ed6ea3bb0e0196df3caf6fd0", + "text": "The Silent Giant", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "The Silent Giant", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b515e497c821b1c9317f35cd14dcba56", + "text": "The need for nuclear in a clean energy system", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "The need for nuclear in a clean energy system", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "61dcf576097c58f62ae82967ec6528e3", + "text": "Executive Summary", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Executive Summary", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4ad0ebfdd0c5031abf1410d785c5fb8d", + "text": "In a world centred on short-term fixes, many of the traits that make nuclear energy a key player in the transition to a sustainable world are not properly valued and often taken for granted. Reflecting on the popular discourse in the world of energy politics it would seem that renewables, and renewables alone, will be responsible for, and capable of, delivering a zero-carbon energy system \u2013 and that it is just a matter of time.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "In a world centred on short-term fixes, many of the traits that make nuclear energy a key player in the transition to a sustainable world are not properly valued and often taken for granted. Reflecting on the popular discourse in the world of energy politics it would seem that renewables, and renewables alone, will be responsible for, and capable of, delivering a zero-carbon energy system – and that it is just a matter of time.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "13a0506c11e4b168cbe43eefa1cfd8b3", + "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d6f5f00c744da6f05c5a6019af118084", + "text": "Energy is the essential agent for promoting human development, and global demand is projected to increase significantly in the coming decades. Securing access to modern and affordable energy is essential for lifting people out of poverty, and for promoting energy independence and economic growth.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Energy is the essential agent for promoting human development, and global demand is projected to increase significantly in the coming decades. Securing access to modern and affordable energy is essential for lifting people out of poverty, and for promoting energy independence and economic growth.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cc18912a4626d26c5aec9cc160b35508", + "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors \u2013 a grand total of 445 in 30 countries \u2013 are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors – a grand total of 445 in 30 countries – are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d832caea40445d014c63847057712dd9", + "text": "Nuclear energy has shown \u2013 be it in France or Sweden \u2013 that it has the potential to be the catalyst for delivering sustainable energy transitions, long before climate change was on the agenda. The use of nuclear energy is the fast track to a high-powered and clean energy system, which not only delivers a healthier environment and an affordable supply of electricity, but also strengthens energy security and helps mitigate climate change.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Nuclear energy has shown – be it in France or Sweden – that it has the potential to be the catalyst for delivering sustainable energy transitions, long before climate change was on the agenda. The use of nuclear energy is the fast track to a high-powered and clean energy system, which not only delivers a healthier environment and an affordable supply of electricity, but also strengthens energy security and helps mitigate climate change.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cb21e65d859f5353e63ba5e58ee110cd", + "text": "The global nuclear industry, led by World Nuclear Association, is ready to take on the challenge. As part of the Harmony Programme, we have set a target to build an additional 1000GWe of reactors across the world before 2050, bringing the global share of electricity production of nuclear to 25%.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The global nuclear industry, led by World Nuclear Association, is ready to take on the challenge. As part of the Harmony Programme, we have set a target to build an additional 1000GWe of reactors across the world before 2050, bringing the global share of electricity production of nuclear to 25%.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b309f2b080b879e4757fd9987176fcda", + "text": "In order to realise the full potential of nuclear energy we have identified three key areas where actions are required:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "In order to realise the full potential of nuclear energy we have identified three key areas where actions are required:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "eac2c33300b969c622a593c21dccf74a", + "text": "The need to create a level playing field that values reliability and energy security", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The need to create a level playing field that values reliability and energy security", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "e3e3cc9e2ab32f5f30d3020617359be1", + "text": "The need for harmony in the nuclear regulatory environment", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The need for harmony in the nuclear regulatory environment", - "type": "ListItem" + ] + } + } }, { + "type": "ListItem", "element_id": "7afd40354511a018573113e2bde4cb05", + "text": "The need for a holistic safety paradigm for the whole electricity system.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "The need for a holistic safety paradigm for the whole electricity system.", - "type": "ListItem" + ] + } + } }, { + "type": "Footer", "element_id": "3f217634181cfb17adbc6c9dcf4dd580", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "1", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "dc3c4d9a725b0ead89311bb08bd251ae", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "Footer" + ] + } + } }, { + "type": "Title", "element_id": "107793a98ed713604c924115b1353a5d", + "text": "The drivers for a clean energy system", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The drivers for a clean energy system", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "38ff5b395d02c9be0a73a2bb9d38573b", + "text": "Electricity is central to modern life \u2013 it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Electricity is central to modern life – it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a660b575fbee999778778a89afa60134", + "text": "45,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "45,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "7fbf9ce5b3e014b9217b60aec312c0df", + "text": "\uf067 Marine", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Marine", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ab2dd368258a30cd790b0f278ca64b45", + "text": "40,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "40,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ab9d4b26b6cc1c74560ef32258b482a6", + "text": "\uf067 CSP", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " CSP", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "018c17c3d50d7f936591ca4a135b2af9", + "text": "35,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "35,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "e2166f70bfd45db57564390fede60255", + "text": "\uf067 Solar PV", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Solar PV", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "55bbc2823bb9b4db0b01cac90f603b03", + "text": "\uf067 Geothermal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Geothermal", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2d9ba3bb0408eff5ce2bcbcccec7fa66", + "text": "30,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "30,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ec4886784cf7703b8ad97506079065f7", + "text": "\uf067 Wind", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Wind", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b14fc79a3d03a5cbdf6abc18f3a4f406", + "text": "h W T", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "h W T", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5c8d0a0b6a1a91be7814eafa0d4c2a55", + "text": "25,000", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "25,000", - "type": "UncategorizedText" + "page_number": 4, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Title", "element_id": "000768c3bfe644b59a14982ca587315c", + "text": "\uf067 Bioenergy", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Bioenergy", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7483f431fd145b902c0454f17eea5fff", + "text": "20,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "20,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "393eb6a42d9ae33b825d9aca55b79679", + "text": "\uf067 Hydro", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Hydro", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b91600c968049790b84c51288b31b0f7", + "text": "\uf067 Nuclear", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Nuclear", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "346505ac03616a58cc7c4abf485c452b", + "text": "15,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "15,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "e8355b0686ccef1e878dcaabc3a30042", + "text": "\uf067 Gas", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Gas", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a9cf5056beb58b487c9ff5b6cacaef88", + "text": "10,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "10,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "add337cb39f238214c4b6f95d63a50aa", + "text": "\uf067 Oil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Oil", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "84f76f8a45befa15e2a8594a82b51ce5", + "text": "5,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "5,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "e183dcd03665affc8bb30c9243134477", + "text": "\uf067 Coal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " Coal", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e5bac97bd6fb9085890bc5b922cf6cdc", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "09c37129095e328d550dc34c1897927b", + "text": "2000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e2cf5fe276794c7bd909bcc96bba4536", + "text": "2010", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2010", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c9390d70d0478eb20c150a4c5b303018", + "text": "2020", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2020", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e3ebf03a084fae2fb4476807369ee5ae", + "text": "2030", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2030", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c8e4c34df341ee19aef0cf820f0c1350", + "text": "2040", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2040", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "481a5ecc274da99b54c336f4e04552e1", + "text": "Figure 1. IEA projected electricity production and sources to 2040 i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Figure 1. IEA projected electricity production and sources to 2040 i", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f39647014ac6e4c086e589ab4f213868", + "text": "The challenge before us, however, goes far beyond just electricity \u2013 we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "The challenge before us, however, goes far beyond just electricity – we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0da6a811fd91b74190a25523af108b7e", + "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear \u2013 instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear – instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d6ff5d09e3ea7dccfdcc62f58dc6b827", + "text": "30,000,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "30,000,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2a26a49470422616eb2789804f5c8e76", + "text": "\uf067 High-carbon \uf067 Low-carbon", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": " High-carbon  Low-carbon", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4b3535a170470607e2fbe21d0c63f5c6", + "text": "25,000,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "25,000,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "515b512a7836bf9a5f1eb21c23d6f296", + "text": "20,000,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "20,000,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "3f1d6f3e961851f438d705cc2cce053c", + "text": "h W G", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "h W G", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "86b3a26fcd28726867ad9906a6af85f9", + "text": "15,000,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "15,000,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e3c1be8ee65c41d9b3733929643c4d73", + "text": "10,000,000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "10,000,000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0c932a08c1c4c06fcd107c442b6eae62", + "text": "5,000,000", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "5,000,000", - "type": "UncategorizedText" + "page_number": 4, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3389ba15aa1907b5de33bdc04db6a62c", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1d368cf080260ebba14634805b92467f", + "text": "1990", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1990", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f27d0d245414b18e56f59b8b37084478", + "text": "1995", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1995", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fe4d81b2a6dd5bb8d779c89d3da1c344", + "text": "2000", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2000", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3b19037a95da4a090f2a2e8dc59202c6", + "text": "2005", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2005", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "22990408e2240ba88b0652b808f31bac", + "text": "2010", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2010", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a1a68e141590e6d1177e11de0911b5b4", + "text": "2015", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2015", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "a00e9275ff4a7c8f3db22aac5ad8b4aa", + "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "bfd27a5d99fab08b146fa729a46eb575", + "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable \u2013 no one should have to choose between heating their home, and essentials like eating \u2013 as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable – no one should have to choose between heating their home, and essentials like eating – as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7d04fe32568029462cb22a1d00c986f6", + "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world\u2019s roads.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world’s roads.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "14146777115f269ab7558c94e44ef099", + "text": "Modern society is dependent on the steady supply of electricity, every day of the year \u2013 regardless of weather, season or time of day \u2013 and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency\u2019s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Modern society is dependent on the steady supply of electricity, every day of the year – regardless of weather, season or time of day – and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency’s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "96be145e939597ad5626c1f24a170598", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "3", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "3ef3ab436a1c66d6c7aa00cdcfc40873", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "4", - "type": "Footer" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c73abd40dd196cc77bb03f2567f46b03", + "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy\u2019s dependence of fossil fuels, and severely undermines the apparently \u2018green credentials\u2019 of many renewables.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy’s dependence of fossil fuels, and severely undermines the apparently ‘green credentials’ of many renewables.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f7078a4205d6d8b21783483c5b426c02", + "text": "Moving to a sustainable future", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Moving to a sustainable future", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "62b6b9ec7e962db41cf8818f42b2bc4d", + "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5\u00b0C iv examined a large number of different scenarios for limiting global warming to 1.5\u00b0C. Of those scenarios which would achieve the 1.5\u00b0C target, the mean increase in nuclear energy\u2019s contribution to electricity production was 2.5 times higher compared to today. However, the \u2018middle-of-the-road\u2019 scenario \u2013 in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits \u2013 sees the need for nuclear increase by five times globally by 2050.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5°C iv examined a large number of different scenarios for limiting global warming to 1.5°C. Of those scenarios which would achieve the 1.5°C target, the mean increase in nuclear energy’s contribution to electricity production was 2.5 times higher compared to today. However, the ‘middle-of-the-road’ scenario – in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits – sees the need for nuclear increase by five times globally by 2050.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2673340156781d2b4e8c53921e661b1d", + "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to \u2018\u2026 ensure competition on a level playing field\u2019 and that the \u2018\u2026 focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.\u2019 Such reforms should also ensure that reliability of electricity production is properly valued and compensated.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to ‘… ensure competition on a level playing field’ and that the ‘… focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.’ Such reforms should also ensure that reliability of electricity production is properly valued and compensated.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "8f9b23ea4d2d725ef37c7766f75d8b5d", + "text": "As part of the Harmony Programme, the world\u2019s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "As part of the Harmony Programme, the world’s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a24ec4b8d4de88ccfcb84af80a57d36d", + "text": "In regard to the need for a level playing field, we see that many of the world\u2019s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "In regard to the need for a level playing field, we see that many of the world’s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6e1f3f8112231981de3a277cfec5feae", + "text": "300", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "300", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "54c9cd9bc7bfd6c2ba77b3497b51f5c6", + "text": "250", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "250", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ab87c3689885cc045997cfe5facb60fe", + "text": "200", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "200", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "3e5270686241550d3f032d4f84ca91c2", + "text": "h W M / $", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "h W M / $", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4f0ba3d67a4681e87255ff671897167e", + "text": "150", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "150", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "eecc9fb044dcf92ea58e0ce6af4a7d5b", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d0d6d5bb3dc5f49af9cc3f88d299e6b4", + "text": "50", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "50", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ba5217df07db5df14c01a2cf84b6f718", + "text": "0", "metadata": { - "data_source": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "c26fee623fcd10afd52ff555ab8f77e2", + "text": "m ercial Photovoltaic", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "m ercial Photovoltaic", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4d25c512cffa22bac1885ffcd60d939c", + "text": "C o m", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "C o m", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a4b35c9ff2f82a78bcbf4cabe854b51b", + "text": "O nshore Wind", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "O nshore Wind", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "3ed71677bbefe2b7f2dc6502524f5d60", + "text": "Offshore Wind", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Offshore Wind", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "4d0c562213e072fcd0a3b8d969ffee0b", + "text": "N uclear", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "N uclear", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "ed2d3166d434f351f3d95e5a05fef431", + "text": "C C G T", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "C C G T", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "21eeca8151ed603ffedd8cffbdf472d0", + "text": "C oal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "C oal", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cda6b904f5bb73a58099d057a36a0268", + "text": "Figure 3. Comparative cost projections for main electricity generators vi", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Figure 3. Comparative cost projections for main electricity generators vi", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c91a17649a07a300b3eab7239a425cb0", + "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "455d6f69058fd7feea9d680bfa2f903b", + "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "850da52352d6ac51fccfa2bdc124ce95", + "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "1aa099868ccbd0c906392b4040b51477", + "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony \u2018\u2026causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves\u2026This results in increased costs and reduced predictability in project execution\u2019. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony ‘…causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves…This results in increased costs and reduced predictability in project execution’. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "0a594689423087ebb4fa5960386f9c79", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "eda2d3adf2e4a1f9064252ed95826bf6", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "6", - "type": "Footer" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2ee71305ea2aeb159dce93675cb959d8", + "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c4ea2d62a084a0ed632f56396b2b93d8", + "text": "140", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "140", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ff300ef7a936a82104bf89668428c28c", + "text": "r a e y", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "r a e y", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "79f841e1d37ecfd8784ae973690d1ec5", + "text": "e", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "e", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7134cee10997fe9e51fad1015727eff1", + "text": "120", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "120", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "12fccf8e709ce3a4f1f933c95073c106", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "77ffbf6cf6036f74cd3769484f249fa7", + "text": "120", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "120", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2c2ff48ef301a979bf2bfbddc3ec468e", + "text": "99.5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "99.5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2564c9377fc604401ffe06ea1749d6c0", + "text": "W T", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "W T", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c6310389f5d96fa9b491f689db571510", + "text": "r e p s e i t i l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "r e p s e i t i l", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e1bf585fc859d0af7217b1b7a1662c21", + "text": "80", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "80", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5b6cbda9e4489c71563c5f7b0127ae05", + "text": "60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "2f28ee79fab311267d2a6421859df000", + "text": "71.9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "71.9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "c931e8cef0651941ebd82cdb01032297", + "text": "a t a F", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "a t a F", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b14c6f175802127e70c1b67b38bc8a96", + "text": "40", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "40", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7f4686887f4d871ecb0fa4d870e7948a", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1e55e0a54601e3a09fb8d79fb7dd71e0", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "3e387bcaf80b2e97608ff7405a1cd816", + "text": "C oal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "C oal", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "98ee9432927178f265747962d54bc1a8", + "text": "Oil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Oil", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6dc6732e023c9802de9dce0787fd9055", + "text": "N atural gas", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "N atural gas", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c1b0f9265bacb2300cdfa38133aa33bb", + "text": "8.5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "8.5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c2556aabf13e472b78a76d3fe2220daa", + "text": "1.78", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "1.78", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "81e2d689f2e94254cf5c009c2f5567d9", + "text": "Offshore wind", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Offshore wind", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4811d56c0b68922b802ebadbc000914b", + "text": "O nshore wind (G erm any)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "O nshore wind (G erm any)", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c24c44cc729dd2edacd19c1e2e03b07d", + "text": "(U K)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "(U K)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8b432a8f4140e37ff1cbc876d19a35b6", + "text": "0.245", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "0.245", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "bb48cb6392f3b5467896fbe2c95a00bb", + "text": "S olar P V", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "S olar P V", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c6a71b10435b94d50a987241cc469323", + "text": "<0.01", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "<0.01", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "a921b39f8f6cb14ebd1457bc0289d95a", + "text": "N uclear*", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "N uclear*", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "2129a4c9eabab988c97099d52b51a174", + "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "ebd40c3b3605600f88963aced0ad4620", + "text": "Nuclear for a sustainable tomorrow", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Nuclear for a sustainable tomorrow", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c989d0dc46738087cc7424063e84cf26", + "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living \u2013 without having to sacrifice the planet or their own well-being.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living – without having to sacrifice the planet or their own well-being.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ddb0d710d848e8f8df56b7f3e69eb6b8", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "cf044769f9de1e6d8c0a981fbd7a3da7", + "text": "90", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "90", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "cd7df179335324e1a0e466279f48ae63", + "text": "\uf067 Coal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Coal", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "c3b6d85ec091b32a5a442e07231a333b", + "text": "\uf067 Gas/Oil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Gas/Oil", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9523389fe3155ad2259f83e3b8200373", + "text": "80", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "80", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "c01604a9226a1a32b6b1b56d08aedd38", + "text": "\uf067 Biofuels/Waste", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Biofuels/Waste", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5e2fc4bbc3832e270bb969b4dc9434e1", + "text": "70", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "70", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "7fccedb36e73ad8636edac2f103b6aee", + "text": "\uf067 Wind/Solar", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Wind/Solar", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "86f38d2ae76a40ed5b03ea5f086057f1", + "text": "60", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "60", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "79453617a52b7992901d79f3f4692643", + "text": "\uf067 Hydro", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Hydro", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "a7b3a0785b67d12ad97c594b8d07e6dc", + "text": "\uf067 Nuclear", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": " Nuclear", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "842ac98b78ba8f79b510fe3b284d928c", + "text": "%", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "%", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "294f4d4cf217dd488bf79f384488f96a", + "text": "50", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "50", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "5f37fd22791466991d3024d52eec5f37", + "text": "40", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "40", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "095709f8c89c38ba5722fc3858758525", + "text": "30", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "30", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4fedff45b0b8f28db1fb10260e5e78f6", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ca8826a935978c3f9dd4705cae8d4d7d", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f04002ddb446765596f434d01700112b", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "e82c13f4f603dbc0ee29c3c21153985e", + "text": "France", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "France", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "fbe241547e7f872d3e2542272811a674", + "text": "Sweden", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Sweden", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "19ea26090b9f025ebd8c3f43cf461a07", + "text": "Switzerland", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Switzerland", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0cbf95c8cb1b954966854d62ff8a1ea2", + "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a8eb08ee2854057ae4434ca03daeb79a", + "text": "600", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "600", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f8065cb2b23e76b8c3e7dcf93c401023", + "text": "500", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "500", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "62c5e65a1c5aabcf4d1a1259867eb6cd", + "text": "\uf067 Non-hydro", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Non-hydro", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b69fc7b1c11cf81a647a4a24b59c6b2f", + "text": "ren. & waste", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "ren. & waste", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9abc83714a45fcd57974bd776ce74ba6", + "text": "400", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "400", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "7cb835e17bbf39d5213ad5737c285c69", + "text": "\uf067 Nuclear", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Nuclear", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "56640b4ee5f3529fac804655f09114a3", + "text": "\uf067 Natural gas", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Natural gas", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "214bfa55aa338564f52ae3cdd1d147e7", + "text": "h W T", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "h W T", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1c7ccebe77f5e4a4abf0b3ab628d2175", + "text": "300", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "300", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "d2a6a274b47457efeef356e55f7f5da0", + "text": "\uf067 Hydro", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Hydro", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "cfac337822be83fa364ebd1ac2962796", + "text": "200", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "200", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "74b2342875bf0fdaa6086d177789ed46", + "text": "\uf067 Oil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Oil", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6c7502293a5a468c3532c4dcf8a9b4f5", + "text": "\uf067 Coal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": " Coal", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "53c31301838bd56825ce61485f1702c2", + "text": "100", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "100", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8a9b5dbe2172f07d8df9fb57ea127ab2", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3e70f3e4f43c12c330cff6bfd22779e9", + "text": "1974", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "1974", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7b26b398650587f39c93052dbb119484", + "text": "1980 1985 1990 1995 2000 2005 2010", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "1980 1985 1990 1995 2000 2005 2010", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "cf3caaa1e8501d26253c6d99eb34a202", + "text": "2017", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "2017", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cdca0e6b7e0fcb2db2a5d728897529e0", + "text": "Figure 6. The lasting decarbonization of French electricity and nuclear\u2019s ability to meet growing demand x", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand x", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "42bc7429f9551ba08f3a3f217ff6c56d", + "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy \u2013 enhanced independence and security in uncertain times.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b45546a69181abaeab5486d141847a69", + "text": "One fuel pellet contains as much energy as a tonne of coal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "One fuel pellet contains as much energy as a tonne of coal", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cd5ed9d69c6d3aa57c6376576a0e093a", + "text": "Unlike other power sources, nuclear energy helps us reduce our total footprint, going beyond just the environment. When accounting for factors such as cost (e.g. fuel and construction costs), carbon (lifecycle greenhouse gas emissions), water and land footprints, nuclear is far ahead of all other energy generators.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Unlike other power sources, nuclear energy helps us reduce our total footprint, going beyond just the environment. When accounting for factors such as cost (e.g. fuel and construction costs), carbon (lifecycle greenhouse gas emissions), water and land footprints, nuclear is far ahead of all other energy generators.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "e322de7c08c9e758347946b72ff05707", + "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "4deaf26b09525636ee093d4043fb3a37", + "text": "7", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "7", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "d484deba727a0cb854f93482219a24df", + "text": "8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "8", - "type": "Footer" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f11771cc97b73e3ec86beb7752b20371", + "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet \u2013 all powered by the atom \u2013 we are able to address one of the key challenges to a sustainable economy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet – all powered by the atom – we are able to address one of the key challenges to a sustainable economy.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "945c8c5aeca9696e2a3c0a9b2de0e1aa", + "text": "We cannot afford to wait \u2013 we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "We cannot afford to wait – we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b79b5b8e937c1a714d3286c6d70881aa", + "text": "Nuclear power is the silent giant of today\u2019s energy system \u2013 it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world \u2013 enabling us to pass on a cleaner planet to our children.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Nuclear power is the silent giant of today’s energy system – it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world – enabling us to pass on a cleaner planet to our children.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "f207455ef2611038ba28ad80e13f5cdd", + "text": "References", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "References", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "fe430b5cd43b44164de750c934e6713a", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "6ee02a6302fc8b1c1c842f4c4bbec00f", + "text": "ii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "ii", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "4af12ee92f2fdbe8b22915d32267a1f0", + "text": "iii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "iii", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "2a5a8e279e10b26a94c4b47c2f4a5b8d", + "text": "iv", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "iv", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "9f630ca850fcb5d12d0b4be310f30eb1", + "text": "v", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "v", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "1c0babe4243d83993f9ed327d56a3da6", + "text": "vi", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "vi", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d178220bc9e4f6eb51a249279b3727bb", + "text": "vii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "vii", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "812f4b58d6834212cb046f7efec45e76", + "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ \u2013 Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions \u2013 with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT \u2013 with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 \u00b0C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity \u2013 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "3608ac8259f31d3e7aa2bc77b4a7ed4f", + "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "3d47050d17c0e49db1ccbd2e728c37ef", + "text": "ix", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "ix", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "cf4326fca54742a90c7eb29712ff1d18", + "text": "x", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "x", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ece517178222ea4948abdd8eca9d0ac7", + "text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6aff8c56dd4618e7ca7830f2209e442e", + "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "a839ef713de31c7a993e697b53db62f1", + "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "7ef54c757ae752a9cc65e9908abad79d", + "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "81ad08c457e3d3dfc6e16eaf515f8529", + "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "92185da56a1412bd97a6c5829da27a3c", + "text": "The Silent Giant \u00a9 2019 World Nuclear Association. Registered in England and Wales, company number 01215741", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/Silent-Giant-(1).pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/Silent-Giant-(1).pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "The Silent Giant © 2019 World Nuclear Association. Registered in England and Wales, company number 01215741", - "type": "Footer" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/page-with-formula.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/page-with-formula.json similarity index 60% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/page-with-formula.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/page-with-formula.json index 0c3e3deb13..81b6237a77 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/page-with-formula.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/page-with-formula.json @@ -1,560 +1,612 @@ [ { + "type": "NarrativeText", "element_id": "7581b3e14a56c276896da707704c221e", + "text": "output values. These are concatenated and once again projected, resulting in the final values, as depicted in Figure 2.", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 15, "text": "Theseareconcatenatedandonceagainprojected , resultinginthefinalvalues , depictedinFigure2", - "url": "figure.2" + "url": "figure.2", + "start_index": 15 } ], - "page_number": 1 - }, - "text": "output values. These are concatenated and once again projected, resulting in the final values, as depicted in Figure 2.", - "type": "NarrativeText" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "NarrativeText", "element_id": "5f0b9e258d134a12434aaa080638e9de", + "text": "Multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. With a single attention head, averaging inhibits this.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. With a single attention head, averaging inhibits this.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "655d84f90463e6c0968a8c2f56e7a109", + "text": "MultiHead(Q, K, V ) = Concat(head1, ..., headh)W O", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "MultiHead(Q, K, V ) = Concat(head1, ..., headh)W O", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "7f8ee935b04434c7d7a9f02afdfb874c", + "text": "where headi = Attention(QW Q", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "where headi = Attention(QW Q", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "dbcefaac4efed4b17f31c2aad76133b3", + "text": "i , KW K i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "i , KW K i", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "f3d278abb390004af2190c494d09eb63", + "text": ", V W V", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": ", V W V", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b8dde388acb2e25a19efd1d35c534ed1", + "text": "i )", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "i )", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "12a3f945fcc10083ac444113b64de288", + "text": "Where the projections are parameter matrices W Q and W O \u2208 Rhdv\u00d7dmodel.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Where the projections are parameter matrices W Q and W O ∈ Rhdv×dmodel.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "31062bb692413a1898a5745694409491", + "text": "i \u2208 Rdmodel\u00d7dk , W K", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "i ∈ Rdmodel×dk , W K", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "d24a75bde79e2d7eccf265ed90ede55c", + "text": "i \u2208 Rdmodel\u00d7dk , W V", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "i ∈ Rdmodel×dk , W V", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c6bf4252ac3ff03dbb2b1366178203fe", + "text": "i \u2208 Rdmodel\u00d7dv", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "i ∈ Rdmodel×dv", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "1bb13fc3afd437fd2f624fa70f647d7d", + "text": "In this work we employ h = 8 parallel attention layers, or heads. For each of these we use dk = dv = dmodel/h = 64. Due to the reduced dimension of each head, the total computational cost is similar to that of single-head attention with full dimensionality.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "In this work we employ h = 8 parallel attention layers, or heads. For each of these we use dk = dv = dmodel/h = 64. Due to the reduced dimension of each head, the total computational cost is similar to that of single-head attention with full dimensionality.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "c10aaa7bd4e8b87c5c960afe5d20ff92", + "text": "3.2.3 Applications of Attention in our Model", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "3.2.3 Applications of Attention in our Model", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "8d70b247879d0490c4c584b9f862e825", + "text": "The Transformer uses multi-head attention in three different ways:", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "The Transformer uses multi-head attention in three different ways:", - "type": "NarrativeText" + ] + } + } }, { + "type": "ListItem", "element_id": "3f08951f4249885ffff0a3867248179b", + "text": "In \"encoder-decoder attention\" layers, the queries come from the previous decoder layer, and the memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence. This mimics the typical encoder-decoder attention mechanisms in sequence-to-sequence models such as [38, 2, 9].", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 354, "text": "38", - "url": "cite.wu2016google" + "url": "cite.wu2016google", + "start_index": 354 }, { - "start_index": 358, "text": "2", - "url": "cite.bahdanau2014neural" + "url": "cite.bahdanau2014neural", + "start_index": 358 }, { - "start_index": 361, "text": "9", - "url": "cite.JonasFaceNet2017" + "url": "cite.JonasFaceNet2017", + "start_index": 361 } ], - "page_number": 1 - }, - "text": "In \"encoder-decoder attention\" layers, the queries come from the previous decoder layer, and the memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence. This mimics the typical encoder-decoder attention mechanisms in sequence-to-sequence models such as [38, 2, 9].", - "type": "ListItem" - }, - { - "element_id": "94df2c25f01f24e13da6e367e093efaa", - "metadata": { + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "The encoder contains self-attention layers. In a self-attention layer all of the keys, values and queries come from the same place, in this case, the output of the previous layer in the encoder. Each position in the encoder can attend to all positions in the previous layer of the encoder.", - "type": "ListItem" + ] + } + } }, { - "element_id": "b25ca1bd5ad13e2e046701e74ff38e40", + "type": "ListItem", + "element_id": "94df2c25f01f24e13da6e367e093efaa", + "text": "The encoder contains self-attention layers. In a self-attention layer all of the keys, values and queries come from the same place, in this case, the output of the previous layer in the encoder. Each position in the encoder can attend to all positions in the previous layer of the encoder.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", + ] + } + } + }, + { + "type": "ListItem", + "element_id": "b25ca1bd5ad13e2e046701e74ff38e40", + "text": "Similarly, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. We need to prevent leftward information flow in the decoder to preserve the auto-regressive property. We implement this inside of scaled dot-product attention by masking out (setting to \u2212\u221e) all values in the input of the softmax which correspond to illegal connections. See Figure 2.", + "metadata": { "languages": [ "eng" ], "links": [ { - "start_index": 347, - "text": "−∞) ofthesoftmaxwhichcorrespondtoillegalconnections . SeeFigure2", - "url": "figure.2" + "text": "\u2212\u221e) ofthesoftmaxwhichcorrespondtoillegalconnections . SeeFigure2", + "url": "figure.2", + "start_index": 347 } ], - "page_number": 1 - }, - "text": "Similarly, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. We need to prevent leftward information flow in the decoder to preserve the auto-regressive property. We implement this inside of scaled dot-product attention by masking out (setting to −∞) all values in the input of the softmax which correspond to illegal connections. See Figure 2.", - "type": "ListItem" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Title", "element_id": "49783f545b113482c782a90e6f610bb8", + "text": "3.3 Position-wise Feed-Forward Networks", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "3.3 Position-wise Feed-Forward Networks", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "06c9ebef52b0fdb1bcff0058cc78471b", + "text": "In addition to attention sub-layers, each of the layers in our encoder and decoder contains a fully connected feed-forward network, which is applied to each position separately and identically. This consists of two linear transformations with a ReLU activation in between.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "In addition to attention sub-layers, each of the layers in our encoder and decoder contains a fully connected feed-forward network, which is applied to each position separately and identically. This consists of two linear transformations with a ReLU activation in between.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "4535f534fbdf2aaf6859c7fe572aba22", + "text": "FFN(x) = max(0, xW1 + b1)W2 + b2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "FFN(x) = max(0, xW1 + b1)W2 + b2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "93809163645d98d16c899470797bb3bc", + "text": "(2)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "(2)", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "3852fd6db06da49daea4a417344ab7bc", + "text": "While the linear transformations are the same across different positions, they use different parameters from layer to layer. Another way of describing this is as two convolutions with kernel size 1. The dimensionality of input and output is dmodel = 512, and the inner-layer has dimensionality df f = 2048.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "While the linear transformations are the same across different positions, they use different parameters from layer to layer. Another way of describing this is as two convolutions with kernel size 1. The dimensionality of input and output is dmodel = 512, and the inner-layer has dimensionality df f = 2048.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "f1bd60eb761bba59cdbdcd3f00ecd10e", + "text": "3.4 Embeddings and Softmax", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "3.4 Embeddings and Softmax", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "65bfc591bfe9b5b267cdba5557b01313", + "text": "Similarly to other sequence transduction models, we use learned embeddings to convert the input tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor- mation and softmax function to convert the decoder output to predicted next-token probabilities. In our model, we share the same weight matrix between the two embedding layers and the pre-softmax dmodel. linear transformation, similar to [30]. In the embedding layers, we multiply those weights by", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], "links": [ { - "start_index": 439, "text": "30", - "url": "cite.press2016using" + "url": "cite.press2016using", + "start_index": 439 } ], - "page_number": 1 - }, - "text": "Similarly to other sequence transduction models, we use learned embeddings to convert the input tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor- mation and softmax function to convert the decoder output to predicted next-token probabilities. In our model, we share the same weight matrix between the two embedding layers and the pre-softmax dmodel. linear transformation, similar to [30]. In the embedding layers, we multiply those weights by", - "type": "NarrativeText" + "page_number": 1, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ba39d029c21581fd00f99ef5a9498850", + "text": "\u221a", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "√", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Footer", "element_id": "8db3e9e959fa5b11744941c1a331e208", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/page-with-formula.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/page-with-formula.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "5", - "type": "Footer" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/recalibrating-risk-report.json similarity index 57% rename from test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json rename to test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/recalibrating-risk-report.json index 1c05f8c9e7..c776e2fe92 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/recalibrating-risk-report.json @@ -1,2902 +1,3192 @@ [ { + "type": "NarrativeText", "element_id": "e9b0ec3bcd6b7e824a7ef6a4ff841005", + "text": "Recalibrating risk", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Recalibrating risk", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9a3565f08ed263473ed92d03f337d9de", + "text": "Putting nuclear risk in context and perspective", "metadata": { + "languages": [ + "eng" + ], + "page_number": 1, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 1 - }, - "text": "Putting nuclear risk in context and perspective", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "f71e85ddcaf37c2df39af496a16c23ab", + "text": "\u00a9 2021 World Nuclear Association", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "© 2021 World Nuclear Association", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "24a8636658e8d7b70af1f0536d494159", + "text": "Registered in England and Wales, company number 01215741. This report represents the views of individual experts, but does not necessarily represent those of any of the World Nuclear Association\u2019s individual member organizations.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 2, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 2 - }, - "text": "Registered in England and Wales, company number 01215741. This report represents the views of individual experts, but does not necessarily represent those of any of the World Nuclear Association’s individual member organizations.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a301376c55ac727652cf954ec5e913d6", + "text": "Executive Summary", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Executive Summary", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5b2ec7692e73027141163ac9031623f5", + "text": "Nuclear energy is crucial to meeting the world\u2019s ever-increasing demand for energy, thanks to its ability to supply affordable, reliable, and sustainable electricity and heat. Despite the many benefits of nuclear energy, its deployment is hindered in some parts of the world due to long-standing misconceptions about its risks. Even with its safety record \u2013 unmatched by any other energy source \u2013 the perception of nuclear power as uniquely dangerous endures.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Nuclear energy is crucial to meeting the world’s ever-increasing demand for energy, thanks to its ability to supply affordable, reliable, and sustainable electricity and heat. Despite the many benefits of nuclear energy, its deployment is hindered in some parts of the world due to long-standing misconceptions about its risks. Even with its safety record – unmatched by any other energy source – the perception of nuclear power as uniquely dangerous endures.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "cb2f79944be60fd4447f31653abc1c33", + "text": "This is reflected in the regulatory burden placed on the nuclear industry, which is geared towards an \u201cas low as possible\u201d approach, demanding radiation levels to be far below the levels where health effects have been observed (and in many cases below natural background radiation). This has resulted in higher costs, without delivering any additional health benefits, and has resulted in policymakers choosing other, more risky energy sources. More often than not, those alternative energy sources have been fossil fuels, greatly exacerbating the well-known risks posed by air pollution and climate change.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "This is reflected in the regulatory burden placed on the nuclear industry, which is geared towards an “as low as possible” approach, demanding radiation levels to be far below the levels where health effects have been observed (and in many cases below natural background radiation). This has resulted in higher costs, without delivering any additional health benefits, and has resulted in policymakers choosing other, more risky energy sources. More often than not, those alternative energy sources have been fossil fuels, greatly exacerbating the well-known risks posed by air pollution and climate change.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "edd5a77bbaec194649c3e909359778dc", + "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0176a09ce730c661b03e05b873f406e0", + "text": "In order to fully unlock the potential of the atom, it is crucial that the gap between perceived and actual risks is addressed. The window of opportunity to act on climate change and other global challenges is closing fast \u2013 we must not delay increasing the contribution of nuclear energy on the grounds of myths and misconceptions.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "In order to fully unlock the potential of the atom, it is crucial that the gap between perceived and actual risks is addressed. The window of opportunity to act on climate change and other global challenges is closing fast – we must not delay increasing the contribution of nuclear energy on the grounds of myths and misconceptions.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f6475defefd93d0b101032386c3904b9", + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "42992ed14b1dc9fc55c2971c1efea52c", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 3, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 3 - }, - "text": "1", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "d7d7ca3be3dd09816ae5d0294281e8d9", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "Footer" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ca878cdd15c9e34463e380dfcb994c2d", + "text": "Perceived versus actual risk", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Perceived versus actual risk", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7a6ffe00fc921772e62d72d8b36ae72d", + "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "31ed39cf3f959ddf86d3eba65cb79a01", + "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture\u2019s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "a66214340855880a5393384d1363511c", + "text": "Rank Order Laypersons", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Rank Order Laypersons", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "fb5001159174b950c8a7f4fe05fd1d91", + "text": "Experts", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Experts", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "7224bd043ff3385128603da2f79ffe5a", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "58db552520c02eb2af3c7aa26226d179", + "text": "Nuclear power", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Nuclear power", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f4f1ed7d41f9d59a7fa026e4691a44ad", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3103c9b3c68800a4c18d5277b865eacb", + "text": "1", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "804fcd51ee1e5d5602fcb9473c0f102f", + "text": "Motor vehicles", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Motor vehicles", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "1900ed258c167f8bafbd8ed1925e176d", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "8e4004d681d241450ad3f98973e2139d", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9001e9360f153c1015bc8aeb9aafdb19", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "3", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "fbbd25426798feb36f4a908cfbfb65b0", + "text": "Handguns", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Handguns", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0cf1000bd6460a5dc4bb008a991e0956", + "text": "2", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "2", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "f33052b919c772581d08bd8defdc05aa", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "b02c47fa996a27837d1748f5c36e3469", + "text": "Smoking", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Smoking", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "16731355d00af3a4fe92791dde53dc5c", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6ed16a54c46bd702b1fd409a3b68c16c", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "3f2e2e9fbb71b7534f8c706d1381819c", + "text": "Electric power (non-nuclear)", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Electric power (non-nuclear)", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e9bd35268e86ab13c036a4bc19a79b56", + "text": "9", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "9", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b236a2a96e6662a4959116caef21d24a", + "text": "17", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "17", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "43d08e4a2b87684e2cc3767f60d248c0", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "d2e68eafc3a9e386d4b37d59a34c1a84", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "fe92e7b03dd01503ea1e016c14cb3497", + "text": "22", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "22", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "c5d62140ba375d471679e3a90f673d5c", + "text": "7", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "7", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "d0282b64f27dd6bacbd0f8c41f0e25ec", + "text": "X-rays", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "X-rays", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "743ac481be02f3121d1bd00818d68b94", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "0d66265a137055110d701bcf66bc91e9", + "text": "\uf094", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "2da091576169a29be7c95436e7004ec4", + "text": "Vaccinations", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Vaccinations", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "80918f5dcc4b0297cfa957e7cb4f648b", + "text": "30", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "30", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "bc1c523a0f95d819a9a49355bfb3bdb9", + "text": "25", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "25", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "90c3828ebbbb997767a82dc417bcb207", + "text": "Figure 1. Ordering of perceived risks for 30 activities and technologies1,iii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "Figure 1. Ordering of perceived risks for 30 activities and technologies1,iii", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f40f3b2180df5d7cc6f1a80864c3fbb4", + "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a4f97839e70a6503b73fa4fc458eded7", + "text": "In fact, scientific consensus is that when it comes to preventing exposure to radiation, nuclear power is much better than other electricity generators. A 2016 reportiii from the United Nations Scientific Committee on the Effects of Atomic Radiation (UNSCEAR) found that coal-generated electricity is responsible for more than half of the total global radiation exposure arising from electricity generation, while nuclear power contributed less than a fifth. Coal miners received high occupational exposure and workers in solar and wind farms received the highest occupational exposure associated with plant construction for the same amount of installed capacity.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "In fact, scientific consensus is that when it comes to preventing exposure to radiation, nuclear power is much better than other electricity generators. A 2016 reportiii from the United Nations Scientific Committee on the Effects of Atomic Radiation (UNSCEAR) found that coal-generated electricity is responsible for more than half of the total global radiation exposure arising from electricity generation, while nuclear power contributed less than a fifth. Coal miners received high occupational exposure and workers in solar and wind farms received the highest occupational exposure associated with plant construction for the same amount of installed capacity.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "fdfcbf5051b3d20071a825a6589fd164", + "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 4, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "cc6caddce6134f3ab941cd1034baa832", + "text": "Natural", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Natural", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "7da4c5414e76d77d311e3a0640cb5a5a", + "text": "Artificial", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Artificial", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "84a6145b34cad14a69230eaadfe330b3", + "text": "\uf06e 48% Radon \uf06e 14% Buildings & soil \uf06e 12% Food & water \uf06e 10% Cosmic \uf06e 4% Thoron", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": " 48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3c83f46bd7e4ebcb59aaf37f4f59a8d3", + "text": "\uf06e 11% Medicine \uf06e 0.4% \uf06e 0.4% Miscellaneous \uf06e 0.2% Occupational \uf06e 0.04% Nuclear discharges", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": " 11% Medicine  0.4%  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "44b025d5a49899fb340a69517c0fb9d7", + "text": "Fallout", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Fallout", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "0475d0402d28b7f957a889e7355472c0", + "text": "Figure 2. Global average exposure from different sources of radiation", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Figure 2. Global average exposure from different sources of radiation", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "3f3508476c710060dfb8431d7600c2ec", + "text": "Fossil fuels \u2013 currently accounting for around 81% of total energy supplyiv \u2013 cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution\u2019s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c525bff677c1e20aabffcbeb462e23d8", + "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "6cdd2346b4496f1af2b33ed75c3410f1", + "text": "25", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "25", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "9715544c9914ef7de8052bc92f04cd58", + "text": "24.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "24.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3559d07dc2379e2f4c1f4653add91028", + "text": "20", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "20", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "75e15b7448c3c129b9ca24698c044163", + "text": "18.4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "18.4", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "13969ab3a15bf32df2fe13c9a40cc40e", + "text": "r a e y", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "r a e y", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "4b6921413fc54cc610daf6b297438229", + "text": "e", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "e", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "aa5644b17f26b1839e445dd290293492", + "text": "15", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "15", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "cebce4658d702821d610f65bd926be90", + "text": "W T", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "W T", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "e3b14044884f4154b0ba61d2434de184", + "text": "r e p s e i t i l", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "r e p s e i t i l", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "07dbe194a47f5cf9a9ca556284c75a4f", + "text": "10", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "10", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "f4af307ce2cdc30401293219340f0099", + "text": "a t a F", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "a t a F", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "54ac36b92df403df7ffaf63424588d4b", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "5", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3fd75f6eaddaee6b63b2ce4a774fe967", + "text": "4.6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "4.6", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "e5967ea57bcfb0730309e1469deda080", + "text": "2.8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "2.8", - "type": "UncategorizedText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "b44ee5b4593fe0d7608c71176b305adb", + "text": "0", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "0", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "df539d6131bf7d159d73b0d0dc52b047", + "text": "C oal", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "C oal", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b72baa7737abca757fc689770500137b", + "text": "Oil", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Oil", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "20206e826347544edfd5e6533ab54f1f", + "text": "Bio m ass", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Bio m ass", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "10849b003629604de2c5cc3e8022eb9e", + "text": "N atural gas", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "N atural gas", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "068ab5055324937852f1104bd7e79295", + "text": "0.07", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "0.07", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "22a2dc33737f8a92014f09ed2c92dc1f", + "text": "Wind", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Wind", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "667426af36c4a61263d4667bbd6bf3cf", + "text": "0.04", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "0.04", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a7606053bbf8555c8d74bf8294c4bb14", + "text": "H ydropo w er", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "H ydropo w er", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "56725953e374c47b37fc81f6e1e7e7e2", + "text": "0.02", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "0.02", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "ed8296145e96a846499a17250d047116", + "text": "S olar", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "S olar", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "ff5c2f0ecf3a004fd75b39436b00fc41", + "text": "0.01", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "0.01", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Title", "element_id": "d7474bea6883be0de77cd5ee766d5350", + "text": "N uclear", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "N uclear", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "990bafbee868bdc70c810bcd97b122da", + "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "63cb8c9bbf197f41ccce41445f34bfff", + "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy \u2013 in particular fossil fuels \u2013 poses a far greater risk to public health by significantly contributing to climate change and air pollution.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy – in particular fossil fuels – poses a far greater risk to public health by significantly contributing to climate change and air pollution.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "ff2e4ccb339b143fc3e2023da7a16b64", + "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "6bda1b0ac58cb5068289427a5f9e2f03", + "text": "2012 UNSCEAR report and the 2015 US NRC SOARCA study.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "2012 UNSCEAR report and the 2015 US NRC SOARCA study.", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "72dd24b52a6a46e82d563b4f9dc0eac8", + "text": "3", "metadata": { + "languages": [ + "eng" + ], + "page_number": 5, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 5 - }, - "text": "3", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "e754a2849dac122e7d2e05447f0da512", + "text": "4", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "4", - "type": "Footer" + ] + } + } }, { + "type": "Title", "element_id": "21b4c32e6d360d1d70e59dad888e306d", + "text": "The low-dose question", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "The low-dose question", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "26e60e901d12cbb5efb851fe945a3f96", + "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix).", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix).", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "31d07d8c2dce96dc1c6daa38f8597ab5", + "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations \u2013 using the assumptions made by LNT \u2013 led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations – using the assumptions made by LNT – led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4fb06aef292d07a36339c830eb23c8b5", + "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "1d9fdadf74d73e63be2e683b0a73d86d", + "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 6, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d3b1c14b00a459af4d32459678203328", + "text": "Adopting an all-hazards approach", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Adopting an all-hazards approach", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4ab8051907907017f78ae83e1b95dc07", + "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A \u2018strong\u2019 interpretation of the precautionary principle, or a \u2018as low as possible\u2019 approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A ‘strong’ interpretation of the precautionary principle, or a ‘as low as possible’ approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "bac4e8745150c829821f40387f9107c6", + "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4ea8f70af2d39cae9f63fce0ea7165f1", + "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an \u2018as low as possible\u2019 mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an ‘as low as possible’ mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "17f00667e4e913d53d1deec4900e13f2", + "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an \u201call-hazards\u201d approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs \u2013 be they economic, environmental, or public health \u2013 associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an “all-hazards” approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs – be they economic, environmental, or public health – associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "5691dbf9952eaf7e9d3ebdce6f25dcb7", + "text": "Plant-level production costs at market prices", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Plant-level production costs at market prices", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "0509a9f00f6b2d7426ff4fba700edfca", + "text": "Grid-level costs of the electricity system", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Grid-level costs of the electricity system", - "type": "Title" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "3af03e497a2d2eba623c6201f4f2f3e0", + "text": "Social and environmental costs of emissions, land-use, climate change, security of supply, etc.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Social and environmental costs of emissions, land-use, climate change, security of supply, etc.", - "type": "UncategorizedText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "c8322b50b5d5ca32246806cfa7559f5e", + "text": "Figure 4. The different levels of cost associated with electricity generationx", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Figure 4. The different levels of cost associated with electricity generationx", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "6b8a38a47e6748e80ef621276ed84394", + "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f52617d9bcc16a116d67ed9f887191ae", + "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "6d3257c982171f0697988a6352e4c113", + "text": "5", "metadata": { + "languages": [ + "eng" + ], + "page_number": 7, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 7 - }, - "text": "5", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "eeae015ec524f1b1bb0f7ac376a2090b", + "text": "6", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "6", - "type": "Footer" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b9eaa26361e53e6e430494de5febf1b1", + "text": "Recalibrating the risk conversation", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Recalibrating the risk conversation", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "d4b3fedebdff5b1fbae81fce93b964b2", + "text": "By looking at radiation risks in isolation, we have created something akin to a \u201cradiation phobia\u201d, that both directly and indirectly harms people around the world. For instance, it is well established that the vast majority of health impacts from Chernobyl and Fukushima Daiichi were not radiological, but rather psychosocial. There has been an observable and dramatic increase in depression, PTSD, substance abuse, and suicides following these events, which can be significantly attributed to the dissonance between the actual and perceived risks of radiation, and the stigmatization they caused.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "By looking at radiation risks in isolation, we have created something akin to a “radiation phobia”, that both directly and indirectly harms people around the world. For instance, it is well established that the vast majority of health impacts from Chernobyl and Fukushima Daiichi were not radiological, but rather psychosocial. There has been an observable and dramatic increase in depression, PTSD, substance abuse, and suicides following these events, which can be significantly attributed to the dissonance between the actual and perceived risks of radiation, and the stigmatization they caused.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f71d9a617fd81ca5b4e4fd8070b56548", + "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this \u201cradiation phobia\u201d. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world\u2019s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this “radiation phobia”. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world’s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0265a7bc9b3d48001a5f4f1ebf577ee7", + "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 8, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 8 - }, - "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "26871df15ce0d1fa304875b2c5d2ed6b", + "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "fa4fe263aabc7d2c80984a2dadd23573", + "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "5bab160f6ce557c074d08735fcd01beb", + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "3c566eec62c62659fc0d9d6f59a2d602", + "text": "7", "metadata": { + "languages": [ + "eng" + ], + "page_number": 9, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 9 - }, - "text": "7", - "type": "Footer" + ] + } + } }, { + "type": "Footer", "element_id": "254ec884050f824b4524f53a2693f685", + "text": "8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "8", - "type": "Footer" + ] + } + } }, { + "type": "Title", "element_id": "3d819f053bf67ec228cf8c23aca02ac7", + "text": "References", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "References", - "type": "Title" + ] + } + } }, { + "type": "Title", "element_id": "b8894ad8644fb6f13e827c34d656547b", + "text": "i", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "i", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f75a66f1ae3524b41d0f7c212c01c2f9", + "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries", "metadata": { - "data_source": { - "permissions_data": [ - { - "mode": 33188 - } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries", - "type": "NarrativeText" + "page_number": 10, + "filetype": "application/pdf", + "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, + "permissions_data": [ + { + "mode": 33188 + } + ] + } + } }, { + "type": "Title", "element_id": "72f0132fa7de410e48a4788048dc5f85", + "text": "ii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "ii", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a303e7831d9ee1669929c5268d18700e", + "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "140aa813a7abab7610055f7f54431142", + "text": "iii", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "iii", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "a2de59991693d2a3e0129b13b3a65651", + "text": "Slovic, P., 2010. The Psychology of risk. Sa\u00fade e Sociedade, 19(4), pp. 731-747.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "fadb9d48b65db285466766306099afcc", + "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "7eabef7be1249e585577b7545d1d4c9c", + "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "d2a4de70fd2993d92af34ff94b0c1271", + "text": "v", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "v", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "723c5e1217a11ce3f7ec41b789d117dd", + "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "0e57c18ea13ba077025a4de4b761f649", + "text": "vi", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "vi", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "0cacfc94697ee7757e6b50e4ce8270e1", + "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9e8616fd7448d2c2795e4934bfefcb49", + "text": "vii World Health Organization. (2016). Updated tables 2016 for \u2018Preventing disease through health environments: a", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "vii World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4fba907b5f7ef041b876e554ceb3b579", + "text": "global assessment of the burden of disease from environmental risks\u2019. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "b330cfba7f878bcd784be51561addf7d", + "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "9ff7e3903c425afa90433ef0907d2406", + "text": "understanding/statistics", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "understanding/statistics", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "dfc1eab611b2b3ccceaa362e1f92b461", + "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health-", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health-", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "92e514acfb3d6dc4b6902970a29d0ee5", + "text": "professional/cancer-statistics/risk", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "professional/cancer-statistics/risk", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "4473f48b4c63cefe31eb431543e88ec4", + "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "b3ef0635f2f90c046da19f982807d0d8", + "text": "the-full-costs-of-electricity-provision?details=true", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "the-full-costs-of-electricity-provision?details=true", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "9ebd87394e985fb62492fe128e51a9de", + "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact-", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact-", - "type": "NarrativeText" + ] + } + } }, { + "type": "Title", "element_id": "35e0e5c249e793b79936b7b9f5f98133", + "text": "sheets/detail/climate-change-and-health", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "sheets/detail/climate-change-and-health", - "type": "Title" + ] + } + } }, { + "type": "NarrativeText", "element_id": "dc6d4d19a7b9f7e96c892f80540189f1", + "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP.", - "type": "NarrativeText" + ] + } + } }, { + "type": "NarrativeText", "element_id": "1e7c1c53824a72f2b36a527d21d7ac3d", + "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 10, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 10 - }, - "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper.", - "type": "NarrativeText" + ] + } + } }, { + "type": "UncategorizedText", "element_id": "48dae8ac77cc653df8fe46ca706c1931", + "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", - "type": "UncategorizedText" + ] + } + } }, { + "type": "Header", "element_id": "7ac9948c40759d5a7c1a2dbe870ca322", + "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", - "type": "Header" + ] + } + } }, { + "type": "NarrativeText", "element_id": "f133df66727c328c27d3b8184204f864", + "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", - "type": "NarrativeText" + ] + } + } }, { + "type": "Footer", "element_id": "a637062adfd950e2491a0929d2b4ac1a", + "text": "Recalibrating risk \u00a9 2021 World Nuclear Association. Registered in England and Wales, company number 01215741", "metadata": { + "languages": [ + "eng" + ], + "page_number": 12, + "filetype": "application/pdf", "data_source": { + "record_locator": { + "path": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/recalibrating-risk-report.pdf" + }, "permissions_data": [ { "mode": 33188 } - ], - "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/s3/small-pdf-set/recalibrating-risk-report.pdf" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 12 - }, - "text": "Recalibrating risk © 2021 World Nuclear Association. Registered in England and Wales, company number 01215741", - "type": "Footer" + ] + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json b/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json deleted file mode 100644 index 27857cec53..0000000000 --- a/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.csv.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "element_id": "249f1ecef0ea42c3b2b663b3e409879c", - "metadata": { - "data_source": { - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/wiki_movie_plots_small.csv" - }, - "url": "s3://utic-dev-tech-fixtures/wiki_movie_plots_small.csv", - "version": "103589111396252091980300895568390462924" - }, - "filetype": "text/csv", - "languages": [ - "eng" - ], - "text_as_html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Release YearTitleOrigin/EthnicityDirectorCastGenreWiki PagePlot
1901Kansas Saloon SmashersAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Kansas_Saloon_SmashersA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]
1901Love by the Light of the MoonAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_MoonThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.
1901The Martyred PresidentsAmericanUnknownunknownhttps://en.wikipedia.org/wiki/The_Martyred_PresidentsThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents—Abraham Lincoln, James A. Garfield, and William McKinley—each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.
1901Terrible Teddy, the Grizzly KingAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_KingLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.
1902Jack and the BeanstalkAmericanGeorge S. Fleming, Edwin S. Porterunknownhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)The earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.
1903Alice in WonderlandAmericanCecil HepworthMay Clarkunknownhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)Alice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.
1903The Great Train RobberyAmericanEdwin S. Porterwesternhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)The film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits‍—‌now four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.
1904The SuburbaniteAmericanWallace McCutcheoncomedyhttps://en.wikipedia.org/wiki/The_SuburbaniteThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.
1905The Little Train RobberyAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Little_Train_RobberyThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"
1905The Night Before ChristmasAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)Scenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.
1906Dream of a Rarebit FiendAmericanWallace McCutcheon and Edwin S. Portershorthttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)The Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.
1906From Leadville to Aspen: A Hold-Up in the RockiesAmericanFrancis J. Marion and Wallace McCutcheonshort action/crime westernhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_RockiesThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.
1906Kathleen MavourneenAmericanEdwin S. Portershort filmhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)Irish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]
1907Daniel BooneAmericanWallace McCutcheon and Ediwin S. PorterWilliam Craven, Florence Lawrencebiographicalhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)Boone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]
1907How Brown Saw the Baseball GameAmericanUnknownUnknowncomedyhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_GameBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]
1907Laughing GasAmericanEdwin Stanton PorterBertha Regustus, Edward Bouldencomedyhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_FilmThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.
1908The Adventures of DollieAmericanD. W. GriffithArthur V. Johnson, Linda Arvidsondramahttps://en.wikipedia.org/wiki/The_Adventures_of_DollieOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.
1908The Black ViperAmericanD. W. GriffithD. W. Griffithdramahttps://en.wikipedia.org/wiki/The_Black_ViperA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.
1908A Calamitous ElopementAmericanD.W. GriffithHarry Solter, Linda Arvidsoncomedyhttps://en.wikipedia.org/wiki/A_Calamitous_ElopementA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.
1908The Call of the WildAmericanD. W. GriffithCharles Insleeadventurehttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)A white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"
1908A Christmas CarolAmericanUnknownTom Rickettsdramahttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)No prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.
1908The Fight for FreedomAmericanD. W. GriffithFlorence Auer, John G. Adolfiwesternhttps://en.wikipedia.org/wiki/The_Fight_for_FreedomThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.
" - }, - "text": "\n\n\nRelease Year\nTitle\nOrigin/Ethnicity\nDirector\nCast\nGenre\nWiki Page\nPlot\n\n\n1901\nKansas Saloon Smashers\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Kansas_Saloon_Smashers\nA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]\n\n\n1901\nLove by the Light of the Moon\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_Moon\nThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.\n\n\n1901\nThe Martyred Presidents\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Martyred_Presidents\nThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents—Abraham Lincoln, James A. Garfield, and William McKinley—each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.\n\n\n1901\nTerrible Teddy, the Grizzly King\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_King\nLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.\n\n\n1902\nJack and the Beanstalk\nAmerican\nGeorge S. Fleming, Edwin S. Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)\nThe earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.\n\n\n1903\nAlice in Wonderland\nAmerican\nCecil Hepworth\nMay Clark\nunknown\nhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)\nAlice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.\n\n\n1903\nThe Great Train Robbery\nAmerican\nEdwin S. Porter\n\nwestern\nhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)\nThe film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits‍—‌now four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.\n\n\n1904\nThe Suburbanite\nAmerican\nWallace McCutcheon\n\ncomedy\nhttps://en.wikipedia.org/wiki/The_Suburbanite\nThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.\n\n\n1905\nThe Little Train Robbery\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Little_Train_Robbery\nThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"\n\n\n1905\nThe Night Before Christmas\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)\nScenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.\n\n\n1906\nDream of a Rarebit Fiend\nAmerican\nWallace McCutcheon and Edwin S. Porter\n\nshort\nhttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)\nThe Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.\n\n\n1906\nFrom Leadville to Aspen: A Hold-Up in the Rockies\nAmerican\nFrancis J. Marion and Wallace McCutcheon\n\nshort action/crime western\nhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_Rockies\nThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.\n\n\n1906\nKathleen Mavourneen\nAmerican\nEdwin S. Porter\n\nshort film\nhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)\nIrish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]\n\n\n1907\nDaniel Boone\nAmerican\nWallace McCutcheon and Ediwin S. Porter\nWilliam Craven, Florence Lawrence\nbiographical\nhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)\nBoone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]\n\n\n1907\nHow Brown Saw the Baseball Game\nAmerican\nUnknown\nUnknown\ncomedy\nhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_Game\nBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]\n\n\n1907\nLaughing Gas\nAmerican\nEdwin Stanton Porter\nBertha Regustus, Edward Boulden\ncomedy\nhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_Film\nThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.\n\n\n1908\nThe Adventures of Dollie\nAmerican\nD. W. Griffith\nArthur V. Johnson, Linda Arvidson\ndrama\nhttps://en.wikipedia.org/wiki/The_Adventures_of_Dollie\nOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.\n\n\n1908\nThe Black Viper\nAmerican\nD. W. Griffith\nD. W. Griffith\ndrama\nhttps://en.wikipedia.org/wiki/The_Black_Viper\nA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.\n\n\n1908\nA Calamitous Elopement\nAmerican\nD.W. Griffith\nHarry Solter, Linda Arvidson\ncomedy\nhttps://en.wikipedia.org/wiki/A_Calamitous_Elopement\nA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.\n\n\n1908\nThe Call of the Wild\nAmerican\nD. W. Griffith\nCharles Inslee\nadventure\nhttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)\nA white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"\n\n\n1908\nA Christmas Carol\nAmerican\nUnknown\nTom Ricketts\ndrama\nhttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)\nNo prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.\n\n\n1908\nThe Fight for Freedom\nAmerican\nD. W. Griffith\nFlorence Auer, John G. Adolfi\nwestern\nhttps://en.wikipedia.org/wiki/The_Fight_for_Freedom\nThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.\n\n\n", - "type": "Table" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.json b/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.json new file mode 100644 index 0000000000..0de1593f28 --- /dev/null +++ b/test_unstructured_ingest/expected-structured-output/s3-minio/wiki_movie_plots_small.json @@ -0,0 +1,22 @@ +[ + { + "type": "Table", + "element_id": "249f1ecef0ea42c3b2b663b3e409879c", + "text": "\n\n\nRelease Year\nTitle\nOrigin/Ethnicity\nDirector\nCast\nGenre\nWiki Page\nPlot\n\n\n1901\nKansas Saloon Smashers\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Kansas_Saloon_Smashers\nA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]\n\n\n1901\nLove by the Light of the Moon\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_Moon\nThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.\n\n\n1901\nThe Martyred Presidents\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Martyred_Presidents\nThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents\u2014Abraham Lincoln, James A. Garfield, and William McKinley\u2014each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.\n\n\n1901\nTerrible Teddy, the Grizzly King\nAmerican\nUnknown\n\nunknown\nhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_King\nLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.\n\n\n1902\nJack and the Beanstalk\nAmerican\nGeorge S. Fleming, Edwin S. Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)\nThe earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.\n\n\n1903\nAlice in Wonderland\nAmerican\nCecil Hepworth\nMay Clark\nunknown\nhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)\nAlice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.\n\n\n1903\nThe Great Train Robbery\nAmerican\nEdwin S. Porter\n\nwestern\nhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)\nThe film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits\u200d\u2014\u200cnow four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.\n\n\n1904\nThe Suburbanite\nAmerican\nWallace McCutcheon\n\ncomedy\nhttps://en.wikipedia.org/wiki/The_Suburbanite\nThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.\n\n\n1905\nThe Little Train Robbery\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Little_Train_Robbery\nThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"\n\n\n1905\nThe Night Before Christmas\nAmerican\nEdwin Stanton Porter\n\nunknown\nhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)\nScenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.\n\n\n1906\nDream of a Rarebit Fiend\nAmerican\nWallace McCutcheon and Edwin S. Porter\n\nshort\nhttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)\nThe Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.\n\n\n1906\nFrom Leadville to Aspen: A Hold-Up in the Rockies\nAmerican\nFrancis J. Marion and Wallace McCutcheon\n\nshort action/crime western\nhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_Rockies\nThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.\n\n\n1906\nKathleen Mavourneen\nAmerican\nEdwin S. Porter\n\nshort film\nhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)\nIrish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]\n\n\n1907\nDaniel Boone\nAmerican\nWallace McCutcheon and Ediwin S. Porter\nWilliam Craven, Florence Lawrence\nbiographical\nhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)\nBoone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]\n\n\n1907\nHow Brown Saw the Baseball Game\nAmerican\nUnknown\nUnknown\ncomedy\nhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_Game\nBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]\n\n\n1907\nLaughing Gas\nAmerican\nEdwin Stanton Porter\nBertha Regustus, Edward Boulden\ncomedy\nhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_Film\nThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.\n\n\n1908\nThe Adventures of Dollie\nAmerican\nD. W. Griffith\nArthur V. Johnson, Linda Arvidson\ndrama\nhttps://en.wikipedia.org/wiki/The_Adventures_of_Dollie\nOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.\n\n\n1908\nThe Black Viper\nAmerican\nD. W. Griffith\nD. W. Griffith\ndrama\nhttps://en.wikipedia.org/wiki/The_Black_Viper\nA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.\n\n\n1908\nA Calamitous Elopement\nAmerican\nD.W. Griffith\nHarry Solter, Linda Arvidson\ncomedy\nhttps://en.wikipedia.org/wiki/A_Calamitous_Elopement\nA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.\n\n\n1908\nThe Call of the Wild\nAmerican\nD. W. Griffith\nCharles Inslee\nadventure\nhttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)\nA white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"\n\n\n1908\nA Christmas Carol\nAmerican\nUnknown\nTom Ricketts\ndrama\nhttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)\nNo prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.\n\n\n1908\nThe Fight for Freedom\nAmerican\nD. W. Griffith\nFlorence Auer, John G. Adolfi\nwestern\nhttps://en.wikipedia.org/wiki/The_Fight_for_Freedom\nThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.\n\n\n", + "metadata": { + "text_as_html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Release YearTitleOrigin/EthnicityDirectorCastGenreWiki PagePlot
1901Kansas Saloon SmashersAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Kansas_Saloon_SmashersA bartender is working at a saloon, serving drinks to customers. After he fills a stereotypically Irish man's bucket with beer, Carrie Nation and her followers burst inside. They assault the Irish man, pulling his hat over his eyes and then dumping the beer over his head. The group then begin wrecking the bar, smashing the fixtures, mirrors, and breaking the cash register. The bartender then sprays seltzer water in Nation's face before a group of policemen appear and order everybody to leave.[1]
1901Love by the Light of the MoonAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Love_by_the_Light_of_the_MoonThe moon, painted with a smiling face hangs over a park at night. A young couple walking past a fence learn on a railing and look up. The moon smiles. They embrace, and the moon's smile gets bigger. They then sit down on a bench by a tree. The moon's view is blocked, causing him to frown. In the last scene, the man fans the woman with his hat because the moon has left the sky and is perched over her shoulder to see everything better.
1901The Martyred PresidentsAmericanUnknownunknownhttps://en.wikipedia.org/wiki/The_Martyred_PresidentsThe film, just over a minute long, is composed of two shots. In the first, a girl sits at the base of an altar or tomb, her face hidden from the camera. At the center of the altar, a viewing portal displays the portraits of three U.S. Presidents\u2014Abraham Lincoln, James A. Garfield, and William McKinley\u2014each victims of assassination.\\r\\nIn the second shot, which runs just over eight seconds long, an assassin kneels feet of Lady Justice.
1901Terrible Teddy, the Grizzly KingAmericanUnknownunknownhttps://en.wikipedia.org/wiki/Terrible_Teddy,_the_Grizzly_KingLasting just 61 seconds and consisting of two shots, the first shot is set in a wood during winter. The actor representing then vice-president Theodore Roosevelt enthusiastically hurries down a hillside towards a tree in the foreground. He falls once, but rights himself and cocks his rifle. Two other men, bearing signs reading \"His Photographer\" and \"His Press Agent\" respectively, follow him into the shot; the photographer sets up his camera. \"Teddy\" aims his rifle upward at the tree and fells what appears to be a common house cat, which he then proceeds to stab. \"Teddy\" holds his prize aloft, and the press agent takes notes. The second shot is taken in a slightly different part of the wood, on a path. \"Teddy\" rides the path on his horse towards the camera and out to the left of the shot, followed closely by the press agent and photographer, still dutifully holding their signs.
1902Jack and the BeanstalkAmericanGeorge S. Fleming, Edwin S. Porterunknownhttps://en.wikipedia.org/wiki/Jack_and_the_Beanstalk_(1902_film)The earliest known adaptation of the classic fairytale, this films shows Jack trading his cow for the beans, his mother forcing him to drop them in the front yard, and beig forced upstairs. As he sleeps, Jack is visited by a fairy who shows him glimpses of what will await him when he ascends the bean stalk. In this version, Jack is the son of a deposed king. When Jack wakes up, he finds the beanstalk has grown and he climbs to the top where he enters the giant's home. The giant finds Jack, who narrowly escapes. The giant chases Jack down the bean stalk, but Jack is able to cut it down before the giant can get to safety. He falls and is killed as Jack celebrates. The fairy then reveals that Jack may return home as a prince.
1903Alice in WonderlandAmericanCecil HepworthMay Clarkunknownhttps://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)Alice follows a large white rabbit down a \"Rabbit-hole\". She finds a tiny door. When she finds a bottle labeled \"Drink me\", she does, and shrinks, but not enough to pass through the door. She then eats something labeled \"Eat me\" and grows larger. She finds a fan when enables her to shrink enough to get into the \"Garden\" and try to get a \"Dog\" to play with her. She enters the \"White Rabbit's tiny House,\" but suddenly resumes her normal size. In order to get out, she has to use the \"magic fan.\"\\r\\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of her grip. \"The Duchess's Cheshire Cat\" appears and disappears a couple of times to Alice and directs her to the Mad Hatter's \"Mad Tea-Party.\" After a while, she leaves.\\r\\nThe Queen invites Alice to join the \"ROYAL PROCESSION\": a parade of marching playing cards and others headed by the White Rabbit. When Alice \"unintentionally offends the Queen\", the latter summons the \"Executioner\". Alice \"boxes the ears\", then flees when all the playing cards come for her. Then she wakes up and realizes it was all a dream.
1903The Great Train RobberyAmericanEdwin S. Porterwesternhttps://en.wikipedia.org/wiki/The_Great_Train_Robbery_(1903_film)The film opens with two bandits breaking into a railroad telegraph office, where they force the operator at gunpoint to have a train stopped and to transmit orders for the engineer to fill the locomotive's tender at the station's water tank. They then knock the operator out and tie him up. As the train stops it is boarded by the bandits\u200d\u2014\u200cnow four. Two bandits enter an express car, kill a messenger and open a box of valuables with dynamite; the others kill the fireman and force the engineer to halt the train and disconnect the locomotive. The bandits then force the passengers off the train and rifle them for their belongings. One passenger tries to escape but is instantly shot down. Carrying their loot, the bandits escape in the locomotive, later stopping in a valley where their horses had been left.\\r\\nMeanwhile, back in the telegraph office, the bound operator awakens, but he collapses again. His daughter arrives bringing him his meal and cuts him free, and restores him to consciousness by dousing him with water.\\r\\nThere is some comic relief at a dance hall, where an Eastern stranger is forced to dance while the locals fire at his feet. The door suddenly opens and the telegraph operator rushes in to tell them of the robbery. The men quickly form a posse, which overtakes the bandits, and in a final shootout kills them all and recovers the stolen mail.
1904The SuburbaniteAmericanWallace McCutcheoncomedyhttps://en.wikipedia.org/wiki/The_SuburbaniteThe film is about a family who move to the suburbs, hoping for a quiet life. Things start to go wrong, and the wife gets violent and starts throwing crockery, leading to her arrest.
1905The Little Train RobberyAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Little_Train_RobberyThe opening scene shows the interior of the robbers' den. The walls are decorated with the portraits of notorious criminals and pictures illustrating the exploits of famous bandits. Some of the gang are lounging about, while others are reading novels and illustrated papers. Although of youthful appearance, each is dressed like a typical Western desperado. The \"Bandit Queen,\" leading a blindfolded new recruit, now enters the room. He is led to the center of the room, raises his right hand and is solemnly sworn in. When the bandage is removed from his eyes he finds himself looking into the muzzles of a dozen or more 45's. The gang then congratulates the new member and heartily shake his hand. The \"Bandit Queen\" who is evidently the leader of the gang, now calls for volunteers to hold up a train. All respond, but she picks out seven for the job who immediately leave the cabin.\\r\\nThe next scene shows the gang breaking into a barn. They steal ponies and ride away. Upon reaching the place agreed upon they picket their ponies and leaving them in charge of a trusted member proceed to a wild mountain spot in a bend of the railroad, where the road runs over a steep embankment. The spot is an ideal one for holding up a train. Cross ties are now placed on the railroad track and the gang hide in some bushes close by and wait for the train. The train soon approaches and is brought to a stop. The engineer leaves his engine and proceeds to remove the obstruction on the track. While he is bending over one of the gang sneaks up behind them and hits him on the head with an axe, and knocks him senseless down the embankment, while the gang surround the train and hold up the passengers. After securing all the \"valuables,\" consisting principally of candy and dolls, the robbers uncouple the engine and one car and make their escape just in time to avoid a posse of police who appear on the scene. Further up the road they abandon the engine and car, take to the woods and soon reach their ponies.\\r\\nIn the meantime the police have learned the particulars of the hold-up from the frightened passengers and have started up the railroad tracks after the fleeing robbers. The robbers are next seen riding up the bed of a shallow stream and finally reach their den, where the remainder of the gang have been waiting for them. Believing they have successfully eluded their pursuers, they proceed to divide the \"plunder.\" The police, however, have struck the right trail and are in close pursuit. While the \"plunder\" is being divided a sentry gives the alarm and the entire gang, abandoning everything, rush from the cabin barely in time to escape capture. The police make a hurried search and again start in pursuit. The robbers are so hard pressed that they are unable to reach their ponies, and are obliged to take chances on foot. The police now get in sight of the fleeing robbers and a lively chase follows through tall weeds, over a bridge and up a steep hill. Reaching a pond the police are close on their heels. The foremost robbers jump in clothes and all and strike out for the opposite bank. Two hesitate and are captured. Boats are secured and after an exciting tussle the entire gang is rounded up. In the mix up one of the police is dragged overboard. The final scene shows the entire gang of bedraggled and crestfallen robbers tied together with a rope and being led away by the police. Two of the police are loaded down with revolvers, knives and cartridge belts, and resemble walking aresenals. As a fitting climax a confederate steals out of the woods, cuts the rope and gallantly rescues the \"Bandit Queen.\"
1905The Night Before ChristmasAmericanEdwin Stanton Porterunknownhttps://en.wikipedia.org/wiki/The_Night_Before_Christmas_(1905_film)Scenes are introduced using lines of the poem.[2] Santa Claus, played by Harry Eytinge, is shown feeding real reindeer[4] and finishes his work in the workshop. Meanwhile, the children of a city household hang their stockings and go to bed, but unable to sleep they engage in a pillow fight. Santa Claus leaves his home on a sleigh with his reindeer. He enters the children's house through the chimney, and leaves the presents. The children come down the stairs and enjoy their presents.
1906Dream of a Rarebit FiendAmericanWallace McCutcheon and Edwin S. Portershorthttps://en.wikipedia.org/wiki/Dream_of_a_Rarebit_Fiend_(1906_film)The Rarebit Fiend gorges on Welsh rarebit at a restaurant. When he leaves, he begins to get dizzy as he starts to hallucinate. He desperately tries to hang onto a lamppost as the world spins all around him. A man helps him get home. He falls into bed and begins having more hallucinatory dreams. During a dream sequence, the furniture begins moving around the room. Imps emerge from a floating Welsh rarebit container and begin poking his head as he sleeps. His bed then begins dancing and spinning wildly around the room before flying out the window with the Fiend in it. The bed floats across the city as the Fiend floats up and off the bed. He hangs off the back and eventually gets caught on a weathervane atop a steeple. His bedclothes tear and he falls from the sky, crashing through his bedroom ceiling. The Fiend awakens from the dream after falling out of his bed.
1906From Leadville to Aspen: A Hold-Up in the RockiesAmericanFrancis J. Marion and Wallace McCutcheonshort action/crime westernhttps://en.wikipedia.org/wiki/From_Leadville_to_Aspen:_A_Hold-Up_in_the_RockiesThe film features a train traveling through the Rockies and a hold up created by two thugs placing logs on the line. They systematically rob the wealthy occupants at gunpoint and then make their getaway along the tracks and later by a hi-jacked horse and cart.
1906Kathleen MavourneenAmericanEdwin S. Portershort filmhttps://en.wikipedia.org/wiki/Kathleen_Mavourneen_(1906_film)Irish villager Kathleen is a tenant of Captain Clearfield, who controls local judges and criminals. Her father owes Clearfield a large debt. Terence O'More saves the village from Clearfield, causing a large celebration.\\r\\nFilm historian Charles Musser writes of Porter's adaptation, \"O'More not only rescues Kathleen from the villain but, through marriage, renews the family for another generation.\"[1]
1907Daniel BooneAmericanWallace McCutcheon and Ediwin S. PorterWilliam Craven, Florence Lawrencebiographicalhttps://en.wikipedia.org/wiki/Daniel_Boone_(1907_film)Boone's daughter befriends an Indian maiden as Boone and his companion start out on a hunting expedition. While he is away, Boone's cabin is attacked by the Indians, who set it on fire and abduct Boone's daughter. Boone returns, swears vengeance, then heads out on the trail to the Indian camp. His daughter escapes but is chased. The Indians encounter Boone, which sets off a huge fight on the edge of a cliff. A burning arrow gets shot into the Indian camp. Boone gets tied to the stake and tortured. The burning arrow sets the Indian camp on fire, causing panic. Boone is rescued by his horse, and Boone has a knife fight in which he kills the Indian chief.[2]
1907How Brown Saw the Baseball GameAmericanUnknownUnknowncomedyhttps://en.wikipedia.org/wiki/How_Brown_Saw_the_Baseball_GameBefore heading out to a baseball game at a nearby ballpark, sports fan Mr. Brown drinks several highball cocktails. He arrives at the ballpark to watch the game, but has become so inebriated that the game appears to him in reverse, with the players running the bases backwards and the baseball flying back into the pitcher's hand. After the game is over, Mr. Brown is escorted home by one of his friends. When they arrive at Brown's house, they encounter his wife who becomes furious with the friend and proceeds to physically assault him, believing he is responsible for her husband's severe intoxication.[1]
1907Laughing GasAmericanEdwin Stanton PorterBertha Regustus, Edward Bouldencomedyhttps://en.wikipedia.org/wiki/Laughing_Gas_(film)#1907_FilmThe plot is that of a black woman going to the dentist for a toothache and being given laughing gas. On her way walking home, and in other situations, she can't stop laughing, and everyone she meets \"catches\" the laughter from her, including a vendor and police officers.
1908The Adventures of DollieAmericanD. W. GriffithArthur V. Johnson, Linda Arvidsondramahttps://en.wikipedia.org/wiki/The_Adventures_of_DollieOn a beautiful summer day a father and mother take their daughter Dollie on an outing to the river. The mother refuses to buy a gypsy's wares. The gypsy tries to rob the mother, but the father drives him off. The gypsy returns to the camp and devises a plan. They return and kidnap Dollie while her parents are distracted. A rescue crew is organized, but the gypsy takes Dollie to his camp. They gag Dollie and hide her in a barrel before the rescue party gets to the camp. Once they leave the gypsies and escapes in their wagon. As the wagon crosses the river, the barrel falls into the water. Still sealed in the barrel, Dollie is swept downstream in dangerous currents. A boy who is fishing in the river finds the barrel, and Dollie is reunited safely with her parents.
1908The Black ViperAmericanD. W. GriffithD. W. Griffithdramahttps://en.wikipedia.org/wiki/The_Black_ViperA thug accosts a girl as she leaves her workplace but a man rescues her. The thug vows revenge and, with the help of two friends, attacks the girl and her rescuer again as they're going for a walk. This time they succeed in kidnapping the rescuer. He is bound and gagged and taken away in a cart. The girl runs home and gets help from several neighbors. They track the ruffians down to a cabin in the mountains where the gang has trapped their victim and set the cabin on fire. A thug and Rescuer fight on the roof of the house.
1908A Calamitous ElopementAmericanD.W. GriffithHarry Solter, Linda Arvidsoncomedyhttps://en.wikipedia.org/wiki/A_Calamitous_ElopementA young couple decides to elope after being caught in the midst of a romantic moment by the woman's angry father. They make plans to leave, but a thief discovers their plans and hides in their trunk and waits for the right moment to steal their belongings.
1908The Call of the WildAmericanD. W. GriffithCharles Insleeadventurehttps://en.wikipedia.org/wiki/The_Call_of_the_Wild_(1908_film)A white girl (Florence Lawrence) rejects a proposal from an Indian brave (Charles Inslee) in this early one-reel Western melodrama. Despite the rejection, the Indian still comes to the girl's defense when she is abducted by his warring tribe. In her first year in films, Florence Lawrence was already the most popular among the Biograph Company's anonymous stock company players. By 1909, she was known the world over as \"The Biograph Girl.\"
1908A Christmas CarolAmericanUnknownTom Rickettsdramahttps://en.wikipedia.org/wiki/A_Christmas_Carol_(1908_film)No prints of the first American film adaptation of A Christmas Carol are known to exist,[1] but The Moving Picture World magazine provided a scene-by-scene description before the film's release.[2] Scrooge goes into his office and begins working. His nephew, along with three women who wish for Scrooge to donate enter. However, Scrooge dismisses them. On the night of Christmas Eve, his long-dead partner Jacob Marley comes as a ghost, warning him of a horrible fate if he does not change his ways. Scrooge meets three spirits that show Scrooge the real meaning of Christmas, along with his grave, the result of his parsimonious ways. The next morning, he wakes and realizes the error of his ways. Scrooge was then euphoric and generous for the rest of his life.
1908The Fight for FreedomAmericanD. W. GriffithFlorence Auer, John G. Adolfiwesternhttps://en.wikipedia.org/wiki/The_Fight_for_FreedomThe film opens in a town on the Mexican border. A poker game is going on in the local saloon. One of the players cheats and is shot dead by another of the players, a Mexican named Pedro. In the uproar that follows Pedro is wounded as he escapes from the saloon. The sheriff is called, who tracks Pedro to his home but Pedro kills the sherriff too. While Pedro hides, his wife Juanita, is arrested on suspicion of murdering the sheriff. Pedro rescues her from the town jail and the two head for the Mexican border. Caught by the posse before they reach the border, Juanita is killed and the film ends with Pedro being arrested and taken back to town.
", + "languages": [ + "eng" + ], + "filetype": "text/csv", + "data_source": { + "url": "s3://utic-dev-tech-fixtures/wiki_movie_plots_small.csv", + "version": "4dee8982fe4fa0181e2fe224e6171dcc", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/" + } + } + } + } +] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.json similarity index 56% rename from test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json rename to test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.json index 1818ec06fe..3fd47e269a 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.json @@ -1,2396 +1,2510 @@ [ { + "type": "Title", "element_id": "e5318630cd973733087506eca36a6be3", + "text": "INTERNATIONAL MONETARY FUND", "metadata": { - "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "INTERNATIONAL MONETARY FUND", - "type": "Title" - }, - { - "element_id": "8466f1c7e05ce04838ff95211c4fff50", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "8466f1c7e05ce04838ff95211c4fff50", + "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE Inflation Peaking amid Low Growth", - "type": "Title" - }, - { - "element_id": "04fca18cc5aea2fdb24b55c01f4fa968", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "04fca18cc5aea2fdb24b55c01f4fa968", + "text": "2023 JAN", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "2023 JAN", - "type": "Title" - }, - { - "element_id": "e8910cbfc6833d5ca117621c22a183e5", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Image", + "element_id": "e8910cbfc6833d5ca117621c22a183e5", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "d897928ec5f88bb42d3aa4058c040eac", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Image", + "element_id": "d897928ec5f88bb42d3aa4058c040eac", + "text": "WORLD ECONOMIC OUTLOOK UPDATE", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE", - "type": "Image" - }, - { - "element_id": "0f02ba386eb35b9043fb86b24f52ddef", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "0f02ba386eb35b9043fb86b24f52ddef", + "text": "Inflation Peaking amid Low Growth", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Inflation Peaking amid Low Growth", - "type": "Title" - }, - { - "element_id": "dd334d49b207f297d7d16d33537c8a2a", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "dd334d49b207f297d7d16d33537c8a2a", + "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000\u201319) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia\u2019s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017\u201319) levels of about 3.5 percent.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Global growth is projected to fall from an estimated 3.4 percent in 2022 to 2.9 percent in 2023, then rise to 3.1 percent in 2024. The forecast for 2023 is 0.2 percentage point higher than predicted in the October 2022 World Economic Outlook (WEO) but below the historical (2000–19) average of 3.8 percent. The rise in central bank rates to fight inflation and Russia’s war in Ukraine continue to weigh on economic activity. The rapid spread of COVID-19 in China dampened growth in 2022, but the recent reopening has paved the way for a faster-than-expected recovery. Global inflation is expected to fall from 8.8 percent in 2022 to 6.6 percent in 2023 and 4.3 percent in 2024, still above pre-pandemic (2017–19) levels of about 3.5 percent.", - "type": "ListItem" - }, - { - "element_id": "c61d4c2389bdea541dd0524ae98bbdb2", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "c61d4c2389bdea541dd0524ae98bbdb2", + "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia\u2019s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "The balance of risks remains tilted to the downside, but adverse risks have moderated since the October 2022 WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress.", - "type": "ListItem" - }, - { - "element_id": "6d8185901672f642fb852b8b77c2f244", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "6d8185901672f642fb852b8b77c2f244", + "text": "tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-19 vaccinations in China would safeguard the recovery, with positive cross-border spillovers. Fiscal support should be better targeted at those most affected by elevated food and energy prices, and broad-based fiscal relief measures should be withdrawn. Stronger multilateral cooperation is essential to preserve the gains from the rules-based multilateral system and to mitigate climate change by limiting emissions and raising green investment.", - "type": "ListItem" - }, - { - "element_id": "b669c3a1a8ac62eede66dc0f3b3a8713", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "b669c3a1a8ac62eede66dc0f3b3a8713", + "text": "Forces Shaping the Outlook", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Forces Shaping the Outlook", - "type": "Title" - }, - { - "element_id": "973eedf8298b7bd74c78ffd345ef5695", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "973eedf8298b7bd74c78ffd345ef5695", + "text": "The global fight against inflation, Russia\u2019s war in Ukraine, and a resurgence of COVID-19 in China weighed on global economic activity in 2022, and the first two factors will continue to do so in 2023.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "The global fight against inflation, Russia’s war in Ukraine, and a resurgence of COVID-19 in China weighed on global economic activity in 2022, and the first two factors will continue to do so in 2023.", - "type": "NarrativeText" - }, - { - "element_id": "f56c5b8e5e9179665c81734bb47ec19b", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f56c5b8e5e9179665c81734bb47ec19b", + "text": "Despite these headwinds, real GDP was surprisingly strong in the third quarter of 2022 in numerous economies, including the United States, the euro area, and major emerging market and developing economies. The sources of these surprises were in many cases domestic: stronger-than-expected private consumption and investment amid tight labor markets and greater-than-anticipated fiscal support. Households spent more to satisfy pent-up demand, particularly on services, partly by drawing down their stock of savings as economies reopened. Business investment rose to meet demand. On the supply side, easing bottlenecks and declining transportation costs reduced pressures on input prices and allowed for a rebound in previously constrained sectors, such as motor vehicles. Energy markets have adjusted faster than expected to the shock from Russia\u2019s invasion of Ukraine.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Despite these headwinds, real GDP was surprisingly strong in the third quarter of 2022 in numerous economies, including the United States, the euro area, and major emerging market and developing economies. The sources of these surprises were in many cases domestic: stronger-than-expected private consumption and investment amid tight labor markets and greater-than-anticipated fiscal support. Households spent more to satisfy pent-up demand, particularly on services, partly by drawing down their stock of savings as economies reopened. Business investment rose to meet demand. On the supply side, easing bottlenecks and declining transportation costs reduced pressures on input prices and allowed for a rebound in previously constrained sectors, such as motor vehicles. Energy markets have adjusted faster than expected to the shock from Russia’s invasion of Ukraine.", - "type": "NarrativeText" - }, - { - "element_id": "2c358a4bb06de0c8a9e1209b4a763bfa", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2c358a4bb06de0c8a9e1209b4a763bfa", + "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most\u2014though not all\u2013\u2013major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "In the fourth quarter of 2022, however, this uptick is estimated to have faded in most—though not all––major economies. US growth remains stronger than expected, with consumers continuing to spend from their stock of savings (the personal saving rate is at its lowest in more than 60 years, except for July 2005), unemployment near historic lows, and plentiful job opportunities. But elsewhere, high-frequency activity indicators (such as business and consumer sentiment, purchasing manager surveys, and mobility indicators) generally point to a slowdown.", - "type": "NarrativeText" - }, - { - "element_id": "102714c79eee9e26b226b46f8289cb96", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "102714c79eee9e26b226b46f8289cb96", + "text": "International Monetary Fund | January 2023 1", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "International Monetary Fund | January 2023 1", - "type": "ListItem" - }, - { - "element_id": "04eab9b077f4803c2a87e4cb81235a0e", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Header", + "element_id": "04eab9b077f4803c2a87e4cb81235a0e", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Header" - }, - { - "element_id": "d8af302f7df089e4e2068a1dccf1903f", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "d8af302f7df089e4e2068a1dccf1903f", + "text": "COVID-19 deepens China\u2019s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China\u2019s slowdown has reduced global trade growth and international commodity prices.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "COVID-19 deepens China’s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China’s slowdown has reduced global trade growth and international commodity prices.", - "type": "NarrativeText" - }, - { - "element_id": "2951e1eb514a453e5813bc6889f759f5", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2951e1eb514a453e5813bc6889f759f5", + "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle\u2014such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks\u2019 communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle—such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks’ communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago.", - "type": "NarrativeText" - }, - { - "element_id": "ea9e70213dbb306bbfc411301593a01f", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "ea9e70213dbb306bbfc411301593a01f", + "text": "Median country Brazil", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Median country Brazil", - "type": "Title" - }, - { - "element_id": "a37a878930b52526b96231dcbcb9b3f4", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "a37a878930b52526b96231dcbcb9b3f4", + "text": "United States", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "United States", - "type": "Title" - }, - { - "element_id": "cdd95d6fc1603d1c87d82ef501854019", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "cdd95d6fc1603d1c87d82ef501854019", + "text": "Euro area", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Euro area", - "type": "Title" - }, - { - "element_id": "f37bd3b81db4c6ef56f199b5d899aebd", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f37bd3b81db4c6ef56f199b5d899aebd", + "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience\u2013\u2013which is", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Winter comes to Europe. European economic growth in 2022 was more resilient than expected in the face of the large negative terms-of-trade shock from the war in Ukraine. This resilience––which is", - "type": "NarrativeText" - }, - { - "element_id": "23cc5e8cc806e807c2d2a5070bd07b1c", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "23cc5e8cc806e807c2d2a5070bd07b1c", + "text": "2 International Monetary Fund | January 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "2 International Monetary Fund | January 2023", - "type": "ListItem" - }, - { - "element_id": "43f5a5eb2707a8a8f8d7fd26a78f9dca", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "43f5a5eb2707a8a8f8d7fd26a78f9dca", + "text": "Nov. \u00ab22", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Nov. «22", - "type": "UncategorizedText" - }, - { - "element_id": "010f24c0d9604698a0a97e91efcf2ae6", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "010f24c0d9604698a0a97e91efcf2ae6", + "text": "Nov.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Nov.", - "type": "Title" - }, - { - "element_id": "d15294a3501442f95dd92dfae7cdf59f", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Header", + "element_id": "d15294a3501442f95dd92dfae7cdf59f", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Header" - }, - { - "element_id": "cb9a40fe73a2ad2fff6f2dfc3504b674", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "cb9a40fe73a2ad2fff6f2dfc3504b674", + "text": "visible in consumption and investment data for the third quarter\u2013\u2013partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "visible in consumption and investment data for the third quarter––partly reflects government support of about 1.2 percent of European Union GDP (net budgetary cost) to households and firms hit by the energy crisis, as well as dynamism from economies reopening. Gas prices have declined by more than expected amid higher non-Russian pipeline and liquefied natural gas flows, compression of demand for gas, and a warmer-than-usual winter. However, the boost from reopening appears to be fading. High-frequency indicators for the fourth quarter suggest that the manufacturing and services sectors are contracting. Consumer confidence and business sentiment have worsened. With inflation at about 10 percent or above in several euro area countries and the United Kingdom, household budgets remain stretched. The accelerated pace of rate increases by the Bank of England and the European Central Bank is tightening financial conditions and cooling demand in the housing sector and beyond.", - "type": "NarrativeText" - }, - { - "element_id": "ae89dcff717032e7fe67736c047811cf", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "ae89dcff717032e7fe67736c047811cf", + "text": "The Forecast", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "The Forecast", - "type": "Title" - }, - { - "element_id": "31faf5d0d42f072f67cd5c14d03d1eb1", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "31faf5d0d42f072f67cd5c14d03d1eb1", + "text": "Growth Bottoming Out", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Growth Bottoming Out", - "type": "Title" - }, - { - "element_id": "7648ab0cbe2c234e76e7509b4bafa67c", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7648ab0cbe2c234e76e7509b4bafa67c", + "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita\u2014which often happens when there is a global recession\u2014is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000\u201319) annual average of 3.8 percent.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Global growth, estimated at 3.4 percent in 2022, is projected to fall to 2.9 percent in 2023 before rising to 3.1 percent in 2024 (Table 1). Compared with the October forecast, the estimate for 2022 and the forecast for 2023 are both higher by about 0.2 percentage point, reflecting positive surprises and greater-than-expected resilience in numerous economies. Negative growth in global GDP or global GDP per capita—which often happens when there is a global recession—is not expected. Nevertheless, global growth projected for 2023 and 2024 is below the historical (2000–19) annual average of 3.8 percent.", - "type": "NarrativeText" - }, - { - "element_id": "d3f06c04ff50e84b159d45dc0eb0469e", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "d3f06c04ff50e84b159d45dc0eb0469e", + "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation\u2013\u2013 especially in advanced economies\u2013\u2013as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "The forecast of low growth in 2023 reflects the rise in central bank rates to fight inflation–– especially in advanced economies––as well as the war in Ukraine. The decline in growth in 2023 from 2022 is driven by advanced economies; in emerging market and developing economies, growth is estimated to have bottomed out in 2022. Growth is expected to pick up in China with the full reopening in 2023. The expected pickup in 2024 in both groups of economies reflects gradual recovery from the effects of the war in Ukraine and subsiding inflation. Following the path of global demand, world trade growth is expected to decline in 2023 to 2.4 percent, despite an easing of supply bottlenecks, before rising to 3.4 percent in 2024.", - "type": "NarrativeText" - }, - { - "element_id": "4c85c18daf7765010c95f9d239d6b773", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4c85c18daf7765010c95f9d239d6b773", + "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "These forecasts are based on a number of assumptions, including on fuel and nonfuel commodity prices, which have generally been revised down since October, and on interest rates, which have been revised up. In 2023, oil prices are projected to fall by about 16 percent, while nonfuel commodity prices are expected to fall by, on average, 6.3 percent. Global interest rate assumptions are revised up, reflecting intensified actual and signaled policy tightening by major central banks since October.", - "type": "NarrativeText" - }, - { - "element_id": "63a58ed0a369adeea609b73bde440690", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "63a58ed0a369adeea609b73bde440690", + "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023.", - "type": "NarrativeText" - }, - { - "element_id": "cf86df4360039e44a9d36c2156253dca", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "cf86df4360039e44a9d36c2156253dca", + "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced", - "type": "ListItem" - }, - { - "element_id": "650a84a42d9c9f9a31dda76b1a9413b1", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "650a84a42d9c9f9a31dda76b1a9413b1", + "text": "International Monetary Fund | January 2023 3", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "International Monetary Fund | January 2023 3", - "type": "ListItem" - }, - { - "element_id": "f57040df3261584cac2da28eac005e0e", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "f57040df3261584cac2da28eac005e0e", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Title" - }, - { - "element_id": "966cfe6522040aae1b567b217ecf2ec2", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "966cfe6522040aae1b567b217ecf2ec2", + "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "economies. There is a 0.4 percentage point upward revision for annual growth in 2023, reflecting carryover effects from domestic demand resilience in 2022, but a 0.2 percentage point downward revision of growth in 2024 due to the steeper path of Federal Reserve rate hikes, to a peak of about 5.1 percent in 2023.", - "type": "NarrativeText" - }, - { - "element_id": "af70a583660627245d8a985c424ed5ce", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "af70a583660627245d8a985c424ed5ce", + "text": "percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "percent in 2024. The 0.2 percentage point upward revision to the forecast for 2023 reflects the effects of faster rate hikes by the European Central Bank and eroding real incomes, offset by the carryover from the 2022 outturn, lower wholesale energy prices, and additional announcements of fiscal purchasing power support in the form of energy price controls and cash transfers.", - "type": "ListItem" - }, - { - "element_id": "31289f8161557502f84829c118ef779f", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "31289f8161557502f84829c118ef779f", + "text": "Growth in the United Kingdom is projected to be \u20130.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Growth in the United Kingdom is projected to be –0.6 percent in 2023, a 0.9 percentage point downward revision from October, reflecting tighter fiscal and monetary policies and financial conditions and still-high energy retail prices weighing on household budgets.", - "type": "ListItem" - }, - { - "element_id": "7b242789f47f74e8004439ab9c9885ba", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "7b242789f47f74e8004439ab9c9885ba", + "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Growth in Japan is projected to rise to 1.8 percent in 2023, with continued monetary and fiscal policy support. High corporate profits from a depreciated yen and earlier delays in implementing previous projects will support business investment. In 2024, growth is expected to decline to 0.9 percent as the effects of past stimulus dissipate.", - "type": "ListItem" - }, - { - "element_id": "7e8c93b4741fa4ff1a87881dcaa89686", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7e8c93b4741fa4ff1a87881dcaa89686", + "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "For emerging market and developing economies, growth is projected to rise modestly, from 3.9 percent in 2022 to 4.0 percent in 2023 and 4.2 percent in 2024, with an upward revision of 0.3 percentage point for 2023 and a downward revision of 0.1 percentage point for 2024. About half of emerging market and developing economies have lower growth in 2023 than in 2022.", - "type": "NarrativeText" - }, - { - "element_id": "c1e048bf64acdbfb4b3f07ae2c2bd9a0", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "c1e048bf64acdbfb4b3f07ae2c2bd9a0", + "text": "percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China\u2019s economy. China\u2019s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent\u2014the first time in more than 40 years with China\u2019s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "percent, respectively, after the deeper-than-expected slowdown in 2022 to 4.3 percent attributable to China’s economy. China’s real GDP slowdown in the fourth quarter of 2022 implies a 0.2 percentage point downgrade for 2022 growth to 3.0 percent—the first time in more than 40 years with China’s growth below the global average. Growth in China is projected to rise to 5.2 percent in 2023, reflecting rapidly improving mobility, and to fall to 4.5 percent in 2024 before settling at below 4 percent over the medium term amid declining business dynamism and slow progress on structural reforms. Growth in India is set to decline from 6.8 percent in 2022 to 6.1 percent in 2023 before picking up to 6.8 percent in 2024, with resilient domestic demand despite external headwinds. Growth in the ASEAN-5 countries (Indonesia, Malaysia, Philippines, Singapore, Thailand) is similarly projected to slow to 4.3 percent in 2023 and then pick up to 4.7 percent in 2024.", - "type": "ListItem" - }, - { - "element_id": "9da9aade2e55fc72f03d8b5a78092503", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "9da9aade2e55fc72f03d8b5a78092503", + "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at \u20132.2 percent compared with a predicted \u20133.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in", - "type": "NarrativeText" - }, - { - "element_id": "e2f761e5fbfa887c5c4654959178dd0e", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "e2f761e5fbfa887c5c4654959178dd0e", + "text": "4 International Monetary Fund | January 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "4 International Monetary Fund | January 2023", - "type": "ListItem" - }, - { - "element_id": "90d34f5e846e7bc2bf02fac74e7081a7", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "90d34f5e846e7bc2bf02fac74e7081a7", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Title" - }, - { - "element_id": "ffb99a5d75b910c8329e0ef1bafde120", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "ffb99a5d75b910c8329e0ef1bafde120", + "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "major trading partner economies, and in Brazil, greater-than-expected fiscal support. Growth in the region is projected to rise to 2.1 percent in 2024, although with a downward revision of 0.3 percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth.", - "type": "NarrativeText" - }, - { - "element_id": "d75546413a70b32318e60a4439de8e7f", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 6 - }, - "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints.", - "type": "NarrativeText" + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } }, { - "element_id": "c4ed38259052e804c28ab9511fb83709", + "type": "NarrativeText", + "element_id": "d75546413a70b32318e60a4439de8e7f", + "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria\u2019s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints.", "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "c4ed38259052e804c28ab9511fb83709", + "text": "Inflation Peaking", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Inflation Peaking", - "type": "Title" - }, - { - "element_id": "6fad039abd2a07c18e04427a5c0934c1", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "6fad039abd2a07c18e04427a5c0934c1", + "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024\u2013\u2013above pre-pandemic (2017\u201319) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "About 84 percent of countries are expected to have lower headline (consumer price index) inflation in 2023 than in 2022. Global inflation is set to fall from 8.8 percent in 2022 (annual average) to 6.6 percent in 2023 and 4.3 percent in 2024––above pre-pandemic (2017–19) levels of about 3.5 percent. The projected disinflation partly reflects declining international fuel and nonfuel commodity prices due to weaker global demand. It also reflects the cooling effects of monetary policy tightening on underlying (core) inflation, which globally is expected to decline from 6.9 percent in the fourth quarter of 2022 (year over year) to 4.5 percent by the fourth quarter of 2023. Still, disinflation will take time: by 2024, projected annual average headline and core inflation will, respectively, still be above pre-pandemic levels in 82 percent and 86 percent of economies.", - "type": "NarrativeText" - }, - { - "element_id": "276ca71db7194279383dbd5ed47e6401", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "276ca71db7194279383dbd5ed47e6401", + "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024\u2013\u2013above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017\u201319) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024\u2013\u2013still high, but close to the pre-pandemic average.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "In advanced economies, annual average inflation is projected to decline from 7.3 percent in 2022 to 4.6 percent in 2023 and 2.6 percent in 2024––above target in several cases. In emerging market and developing economies, projected annual inflation declines from 9.9 percent in 2022 to 8.1 percent in 2023 and 5.5 percent in 2024, above the 4.9 percent pre-pandemic (2017–19) average. In low-income developing countries, inflation is projected to moderate from 14.2 percent in 2022 to 8.6 percent in 2024––still high, but close to the pre-pandemic average.", - "type": "NarrativeText" - }, - { - "element_id": "dad6fdc16c791d4a298b86a74a7787cb", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "dad6fdc16c791d4a298b86a74a7787cb", + "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "The balance of risks to the global outlook remains tilted to the downside, with scope for lower growth and higher inflation, but adverse risks have moderated since the October 2022 World Economic Outlook.", - "type": "NarrativeText" - }, - { - "element_id": "4a9791d1daa8c2de8ae6fe9473b0806c", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "4a9791d1daa8c2de8ae6fe9473b0806c", + "text": "International Monetary Fund | January 2023. 5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "International Monetary Fund | January 2023. 5", - "type": "ListItem" - }, - { - "element_id": "5ed317e88b9e032f0a07e1a59492920c", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "5ed317e88b9e032f0a07e1a59492920c", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Title" - }, - { - "element_id": "7b55f9cf2ab15edd5f5ddda66cd012d1", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "7b55f9cf2ab15edd5f5ddda66cd012d1", + "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Table 1. Overview of the World Economic Outlook Projections (Percent change, unless noted otherwise)", - "type": "Title" - }, - { - "element_id": "4eb34e94205ace0b2308f955a58a3f0a", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Table", + "element_id": "4eb34e94205ace0b2308f955a58a3f0a", + "text": "Year over Year Difference from October 2022 Q4 over Q4 2/ 2021 Estimate 2022 Projections 2023 2024 WEO Projections 1/ 2023 2024 Estimate 2022 Projections 2023 2024 6.2 3.4 2.9 3.1 0.2 \u20130.1 1.9 3.2 3.0 Advanced Economies United States Euro Area Germany France Italy Spain Japan United Kingdom Canada Other Advanced Economies 3/ 5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3 2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8 1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 \u20130.6 1.5 2.0 1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4 0.1 0.4 0.2 0.4 0.0 0.8 \u20130.1 0.2 \u20130.9 0.0 \u20130.3 \u20130.2 \u20130.2 \u20130.2 \u20130.1 0.0 \u20130.4 \u20130.2 \u20130.4 0.3 \u20130.1 \u20130.2 1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4 1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 \u20130.5 1.2 2.1 1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2 Emerging Market and Developing Economies Emerging and Developing Asia China India 4/ Emerging and Developing Europe Russia Latin America and the Caribbean Brazil Mexico Middle East and Central Asia Saudi Arabia Sub-Saharan Africa Nigeria South Africa 6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9 3.9 4.3 3.0 6.8 0.7 \u20132.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6 4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2 4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3 0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 \u20130.4 \u20131.1 0.1 0.2 0.1 \u20130.1 0.0 0.0 0.0 0.1 0.6 \u20130.3 \u20130.4 \u20130.2 0.2 0.5 0.0 0.0 0.0 2.5 3.4 2.9 4.3 \u20132.0 \u20134.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0 5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5 4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8 Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries 6.0 5.5 3.8 4.1 7.0 4.1 3.1 3.7 5.2 5.4 3.8 4.9 2.4 0.7 4.3 3.2 4.0 4.9 2.5 1.8 4.7 3.5 4.1 5.6 0.3 0.0 \u20130.2 \u20130.4 0.4 0.0 \u20130.1 \u20130.3 \u20130.2 0.2 0.0 0.1 1.7 1.8 3.7 . . . 2.5 . . . 2.5 1.2 5.7 . . . 5.0 . . . 2.5 2.0 4.0 . . . 4.1 . . . 10.4 9.4 12.1 5.4 6.6 3.4 2.4 2.3 2.6 3.4 2.7 4.6 \u20130.1 0.0 \u20130.3 \u20130.3 \u20130.4 0.0 . . . . . . . . . . . . . . . . . . . . . . . . . . . 65.8 26.4 39.8 7.0 \u201316.2 \u20136.3 \u20137.1 \u20130.4 \u20133.3 \u20130.1 \u20130.9 0.3 11.2 \u20132.0 \u20139.8 1.4 \u20135.9 \u20130.2", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Year over Year Difference from October 2022 Q4 over Q4 2/ 2021 Estimate 2022 Projections 2023 2024 WEO Projections 1/ 2023 2024 Estimate 2022 Projections 2023 2024 6.2 3.4 2.9 3.1 0.2 –0.1 1.9 3.2 3.0 Advanced Economies United States Euro Area Germany France Italy Spain Japan United Kingdom Canada Other Advanced Economies 3/ 5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3 2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8 1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0 1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4 0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3 –0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2 1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4 1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1 1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2 Emerging Market and Developing Economies Emerging and Developing Asia China India 4/ Emerging and Developing Europe Russia Latin America and the Caribbean Brazil Mexico Middle East and Central Asia Saudi Arabia Sub-Saharan Africa Nigeria South Africa 6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9 3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6 4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2 4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3 0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1 –0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0 2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0 5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5 4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8 Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries 6.0 5.5 3.8 4.1 7.0 4.1 3.1 3.7 5.2 5.4 3.8 4.9 2.4 0.7 4.3 3.2 4.0 4.9 2.5 1.8 4.7 3.5 4.1 5.6 0.3 0.0 –0.2 –0.4 0.4 0.0 –0.1 –0.3 –0.2 0.2 0.0 0.1 1.7 1.8 3.7 . . . 2.5 . . . 2.5 1.2 5.7 . . . 5.0 . . . 2.5 2.0 4.0 . . . 4.1 . . . 10.4 9.4 12.1 5.4 6.6 3.4 2.4 2.3 2.6 3.4 2.7 4.6 –0.1 0.0 –0.3 –0.3 –0.4 0.0 . . . . . . . . . . . . . . . . . . . . . . . . . . . 65.8 26.4 39.8 7.0 –16.2 –6.3 –7.1 –0.4 –3.3 –0.1 –0.9 0.3 11.2 –2.0 –9.8 1.4 –5.9 –0.2", - "type": "Table" - }, - { - "element_id": "2d46f6faafe2544549dd92fcbe07addd", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "2d46f6faafe2544549dd92fcbe07addd", + "text": "World Output", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "World Output", - "type": "Title" - }, - { - "element_id": "8c0af04bc7a87b5013697ac78410d6e3", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "8c0af04bc7a87b5013697ac78410d6e3", + "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies", - "type": "UncategorizedText" - }, - { - "element_id": "61cc155370f47a3bdda30c407ce2958b", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "61cc155370f47a3bdda30c407ce2958b", + "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)", - "type": "NarrativeText" - }, - { - "element_id": "7b87f70df5eb0c5c5f4e05cb89393628", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "7b87f70df5eb0c5c5f4e05cb89393628", + "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/", - "type": "UncategorizedText" - }, - { - "element_id": "01f6110f227ca362f21307a252d387bc", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "01f6110f227ca362f21307a252d387bc", + "text": "4.7 3.1 5.9", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "4.7 3.1 5.9", - "type": "UncategorizedText" - }, - { - "element_id": "54dc7fe009c437d116108cec181e3792", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "54dc7fe009c437d116108cec181e3792", + "text": "8.8 7.3 9.9", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "8.8 7.3 9.9", - "type": "UncategorizedText" - }, - { - "element_id": "348fa41c29526b8d4933fa0492af810e", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "348fa41c29526b8d4933fa0492af810e", + "text": "6.6 4.6 8.1", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "6.6 4.6 8.1", - "type": "UncategorizedText" - }, - { - "element_id": "6aabe10eb8cae2bc4de874c542565ec1", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "6aabe10eb8cae2bc4de874c542565ec1", + "text": "4.3 2.6 5.5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "4.3 2.6 5.5", - "type": "UncategorizedText" - }, - { - "element_id": "41242d1e0d075b14e1153a9a6eac1abc", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "41242d1e0d075b14e1153a9a6eac1abc", + "text": "0.1 0.2 0.0", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "0.1 0.2 0.0", - "type": "UncategorizedText" - }, - { - "element_id": "553fd3a7a662ec2190665ed75ae70c65", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "553fd3a7a662ec2190665ed75ae70c65", + "text": "0.2 0.2 0.2", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "0.2 0.2 0.2", - "type": "UncategorizedText" - }, - { - "element_id": "42d777ee314b8f164aabb1976e185638", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "42d777ee314b8f164aabb1976e185638", + "text": "9.2 7.8 10.4", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "9.2 7.8 10.4", - "type": "UncategorizedText" - }, - { - "element_id": "b536aeb880e2566ac393af151f8a53c0", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "b536aeb880e2566ac393af151f8a53c0", + "text": "5.0 3.1 6.6", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "5.0 3.1 6.6", - "type": "UncategorizedText" - }, - { - "element_id": "2ccb29d3db680f09050e16da013cfa4c", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2ccb29d3db680f09050e16da013cfa4c", + "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Note: Real effective exchange rates are assumed to remain constant at the levels prevailing during October 26, 2022--November 23, 2022. Economies are listed on the basis of economic size. The aggregated quarterly data are seasonally adjusted. WEO = World Economic Outlook. 1/ Difference based on rounded figures for the current and October 2022 WEO forecasts. Countries whose forecasts have been updated relative to October 2022 WEO forecasts account for approximately 90 percent of world GDP measured at purchasing-power-parity weights. 2/ For World Output (Emerging Market and Developing Economies), the quarterly estimates and projections account for approximately 90 percent (80 percent) of annual world (emerging market and developing economies') output at purchasing-power-parity weights. 3/ Excludes the Group of Seven (Canada, France, Germany, Italy, Japan, United Kingdom, United States) and euro area countries. 4/ For India, data and projections are presented on a fiscal year basis, with FY 2022/23 (starting in April 2022) shown in the 2022 column. India's growth projections are 5.4 percent in 2023 and 6.8 percent in 2024 based on calendar year. 5/ Indonesia, Malaysia, Philippines, Singapore, Thailand. 6/ Simple average of growth rates for export and import volumes (goods and services). 7/ Simple average of prices of UK Brent, Dubai Fateh, and West Texas Intermediate crude oil. The average assumed price of oil in US dollars a barrel, based on futures markets (as of November 29, 2022), is $81.13 in 2023 and $75.36 in 2024. 8/ Excludes Venezuela. 9/ The inflation rate for the euro area is 5.7% in 2023 and 3.3% in 2024, that for Japan is 2.8% in 2023 and 2.0% in 2024, and that for the United States is 4.0% in 2023 and 2.2% in 2024.", - "type": "NarrativeText" - }, - { - "element_id": "fcdbec90e78c273dd191e6938b63b3aa", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "fcdbec90e78c273dd191e6938b63b3aa", + "text": "Upside risks\u2014Plausible upside risks include more favorable surprises to domestic spending\u2014as in the third quarter of 2022\u2014which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Upside risks—Plausible upside risks include more favorable surprises to domestic spending—as in the third quarter of 2022—which, however, would increase inflation further. At the same time, there is room for an upside scenario with lower-than-expected inflation and less monetary tightening:", - "type": "NarrativeText" - }, - { - "element_id": "05b94a59813751cc052b233294eea3bf", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "05b94a59813751cc052b233294eea3bf", + "text": "support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption\u2014particularly of services, including tourism.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "support and, in many cases, still-tight labor markets and solid wage growth, pent-up demand remains an upside risk to the growth outlook. In some advanced economies, recent data show that households are still on net adding to their stock of excess savings (as in some euro area countries and the United Kingdom) or have ample savings left (as in the United States). This leaves scope for a further boost to consumption—particularly of services, including tourism.", - "type": "ListItem" - }, - { - "element_id": "a92c9c37dd45ab95af9ee011a431bbfd", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "a92c9c37dd45ab95af9ee011a431bbfd", + "text": "6 International Monetary Fund | January 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "6 International Monetary Fund | January 2023", - "type": "ListItem" - }, - { - "element_id": "36b409ff8f7f08da8322fb6945b054ef", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "36b409ff8f7f08da8322fb6945b054ef", + "text": "3.5 2.3 4.5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "3.5 2.3 4.5", - "type": "UncategorizedText" - }, - { - "element_id": "9d90e4fc536bda02ef1167255c84eed7", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Header", + "element_id": "9d90e4fc536bda02ef1167255c84eed7", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Header" - }, - { - "element_id": "b20c66b46dd6311d2a324c0156315353", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b20c66b46dd6311d2a324c0156315353", + "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "However, the boost to demand could stoke core inflation, leading to even tighter monetary policies and a stronger-than-expected slowdown later on. Pent-up demand could also fuel a stronger rebound in China.", - "type": "NarrativeText" - }, - { - "element_id": "71a1e86ca0ab0fa7c75ad3568285eda6", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "71a1e86ca0ab0fa7c75ad3568285eda6", + "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a \u201csofter\u201d landing with less monetary tightening.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening.", - "type": "ListItem" - }, - { - "element_id": "36c4ce26347dd1aae7029596e0cb3e96", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "36c4ce26347dd1aae7029596e0cb3e96", + "text": "Downside risks\u2014Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Downside risks—Numerous downside risks continue to weigh on the global outlook, lowering growth while, in a number of cases, adding further to inflation:", - "type": "NarrativeText" - }, - { - "element_id": "9d5064113562605b7a1b3f1bc24840a4", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "9d5064113562605b7a1b3f1bc24840a4", + "text": "capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems.", - "type": "ListItem" - }, - { - "element_id": "a020d627fc081a56453ab6b3ee8c0881", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "a020d627fc081a56453ab6b3ee8c0881", + "text": "vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China\u2019s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase.", - "type": "ListItem" - }, - { - "element_id": "5a4931739cd615032ea03860c3ace150", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "5a4931739cd615032ea03860c3ace150", + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China\u2019s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy.", - "type": "NarrativeText" - }, - { - "element_id": "5c53b2d3f514a92cc6c099fd9a46b49a", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "5c53b2d3f514a92cc6c099fd9a46b49a", + "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy.", - "type": "ListItem" - }, - { - "element_id": "e3c931f10a12b33fb9aeb6427a16c7ae", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "e3c931f10a12b33fb9aeb6427a16c7ae", + "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing", - "type": "NarrativeText" - }, - { - "element_id": "84d70cebf2068479ad03b1f4fca89141", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "84d70cebf2068479ad03b1f4fca89141", + "text": "earlier geopolitical tensions, such as those associated with the US-China trade dispute.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "earlier geopolitical tensions, such as those associated with the US-China trade dispute.", - "type": "ListItem" - }, - { - "element_id": "9fda15cda44df99e579f024b037349d9", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "9fda15cda44df99e579f024b037349d9", + "text": "International Monetary Fund | January 2023. 7", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "International Monetary Fund | January 2023. 7", - "type": "ListItem" - }, - { - "element_id": "8e25ec572941a6a14bbab64312dd35a2", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Header", + "element_id": "8e25ec572941a6a14bbab64312dd35a2", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Header" - }, - { - "element_id": "1e6c82906e4e6d414004cb6af55cfdd8", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "1e6c82906e4e6d414004cb6af55cfdd8", + "text": "Fragmentation could intensify\u2014with more restrictions on cross-border movements of capital, workers, and international payments\u2014and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Fragmentation could intensify—with more restrictions on cross-border movements of capital, workers, and international payments—and could hamper multilateral cooperation on providing global public goods.1 The costs of such fragmentation are especially high in the short term, as replacing disrupted cross-border flows takes time.", - "type": "NarrativeText" - }, - { - "element_id": "72c775cd8adaa0370bdbb1bc30c57907", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Title", + "element_id": "72c775cd8adaa0370bdbb1bc30c57907", + "text": "Policy Priorities", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Policy Priorities", - "type": "Title" - }, - { - "element_id": "975bc3e42433eb8c66e588bea7cd7510", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "975bc3e42433eb8c66e588bea7cd7510", + "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks\u2019 balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Securing global disinflation: For most economies, the priority remains achieving a sustained reduction in inflation toward target levels. Raising real policy rates and keeping them above their neutral levels until underlying inflation is clearly declining would ward off risks of inflation expectations de- anchoring. Clear central bank communication and appropriate reactions to shifts in the data will help keep inflation expectations anchored and lessen wage and price pressures. Central banks’ balance sheets will need to be unwound carefully, amid market liquidity risks. Gradual and steady fiscal tightening would contribute to cooling demand and limit the burden on monetary policy in the fight against inflation. In countries where output remains below potential and inflation is in check, maintaining monetary and fiscal accommodation may be appropriate.", - "type": "NarrativeText" - }, - { - "element_id": "aa7909a0b52db93a6f10d7fd56f76550", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "aa7909a0b52db93a6f10d7fd56f76550", + "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures\u2014including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Containing the reemergence of COVID-19: Addressing the ongoing pandemic requires coordinated efforts to boost vaccination and medicine access in countries where coverage remains low as well as the deployment of pandemic preparedness measures—including a global push toward sequencing and sharing data. In China, focusing vaccination efforts on vulnerable groups and maintaining sufficiently high coverage of boosters and antiviral medicines would minimize the risks of severe health outcomes and safeguard the recovery, with favorable cross-border spillovers.", - "type": "NarrativeText" - }, - { - "element_id": "ca07b88dc1247f54719122a49cc5fd59", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "ca07b88dc1247f54719122a49cc5fd59", + "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Ensuring financial stability: Depending on country circumstances, macroprudential tools can be used to tackle pockets of elevated financial sector vulnerabilities. Monitoring housing sector developments and conducting stress tests in economies where house prices have increased significantly over the past few years are warranted. In China, central government action to resolve the property crisis and reduce the risk of spillovers to financial stability and growth is a priority, including by strengthening temporary mechanisms to protect presale homebuyers from the risk of non-delivery and by restructuring troubled developers. Globally, financial sector regulations introduced after the global financial crisis have contributed to the resilience of banking sectors throughout the pandemic, but there is a need to address data and supervisory gaps in the less-regulated nonbank financial sector, where risks may have built up inconspicuously. Recent turmoil in the crypto space also highlights the urgent need to introduce common standards and reinforce oversight of crypto assets.", - "type": "NarrativeText" - }, - { - "element_id": "328214785cd96977b4cbbd5a778942d1", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "328214785cd96977b4cbbd5a778942d1", + "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Restoring debt sustainability: Lower growth and higher borrowing costs have raised public debt ratios in several economies. Where debt is unsustainable, implementing restructuring or reprofiling early on as part of a package of reforms (including fiscal consolidation and growth-enhancing supply-side reforms) can avert the need for more disruptive adjustment later.", - "type": "NarrativeText" - }, - { - "element_id": "3a2ff2baa28f7160cee0e8c2cc842661", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "3a2ff2baa28f7160cee0e8c2cc842661", + "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Supporting the vulnerable: The surge in global energy and food prices triggered a cost-of-living crisis. Governments acted swiftly with support to households and firms, which helped cushion effects on growth and at times limited the pass-through from energy prices to headline inflation through price", - "type": "NarrativeText" - }, - { - "element_id": "3601c468c40a282c19c8abdcca4add1b", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "3601c468c40a282c19c8abdcca4add1b", + "text": "1 See \u201cGeo-Economic Fragmentation and the Future of Multilateralism,\u201d IMF Staff Discussion Note 2023/001.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "1 See “Geo-Economic Fragmentation and the Future of Multilateralism,” IMF Staff Discussion Note 2023/001.", - "type": "NarrativeText" + "page_number": 9, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } }, { + "type": "Footer", "element_id": "4480feb00164d7db4e269a2adc4f73d2", + "text": "8 International Monetary Fund | January 2023", "metadata": { - "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "8 International Monetary Fund | January 2023", - "type": "Footer" - }, - { - "element_id": "0f9c4deaa35a06e367d2460d890b3dad", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Header", + "element_id": "0f9c4deaa35a06e367d2460d890b3dad", + "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "WORLD ECONOMIC OUTLOOK UPDATE, JANUARY 2023", - "type": "Header" - }, - { - "element_id": "426f6eb8bbe7d2f5fe507b439d2ebc2b", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "426f6eb8bbe7d2f5fe507b439d2ebc2b", + "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "controls. The temporary and broad-based measures are becoming increasingly costly and should be withdrawn and replaced by targeted approaches. Preserving the energy price signal will encourage a reduction in energy consumption and limit the risks of shortages. Targeting can be achieved through social safety nets such as cash transfers to eligible households based on income or demographics or by transfers through electricity companies based on past energy consumption. Subsidies should be temporary and offset by revenue-generating measures, including one-time solidarity taxes on high- income households and companies, where appropriate.", - "type": "NarrativeText" - }, - { - "element_id": "405cec1fe989a0ad8960b938355d4c7b", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "405cec1fe989a0ad8960b938355d4c7b", + "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth\u2014 including market power, rent seeking, rigid regulation and planning, and inefficient education\u2014and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Reinforcing supply: Supply-side policies could address the key structural factors impeding growth— including market power, rent seeking, rigid regulation and planning, and inefficient education—and could help build resilience, reduce bottlenecks, and alleviate price pressures. A concerted push for investment along the supply chain of green energy technologies would bolster energy security and help advance progress on the green transition.", - "type": "NarrativeText" - }, - { - "element_id": "c7a53c81c39c885df191bbf239165b3d", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "c7a53c81c39c885df191bbf239165b3d", + "text": "Strengthening multilateral cooperation\u2014Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Strengthening multilateral cooperation—Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:", - "type": "NarrativeText" - }, - { - "element_id": "65269b45848d66c7d8b45099ddf6a328", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "65269b45848d66c7d8b45099ddf6a328", + "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global", - "type": "ListItem" - }, - { - "element_id": "2ae3be4a3aa90a31eea5a5a306d3837c", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2ae3be4a3aa90a31eea5a5a306d3837c", + "text": "distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential. Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty\u2019s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non\u2013 Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential. Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle-income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes.", - "type": "NarrativeText" - }, - { - "element_id": "45e6f88f4dabe97ff6778317c9249838", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "45e6f88f4dabe97ff6778317c9249838", + "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO-based agreements, and fully restoring the WTO dispute settlement system.", - "type": "ListItem" - }, - { - "element_id": "366056038aacb60e1ce242fff64e54ba", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "366056038aacb60e1ce242fff64e54ba", + "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF\u2019s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low-income countries facing shocks.", - "type": "ListItem" - }, - { - "element_id": "7325338aa370c261a2582cadd41fefb5", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "7325338aa370c261a2582cadd41fefb5", + "text": "Speeding the green transition: To meet governments\u2019 climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", - "type": "ListItem" - }, - { - "element_id": "fbb9304898eea6b4435134ec70696620", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "fbb9304898eea6b4435134ec70696620", + "text": "International Monetary Fund | January 2023 9", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "International Monetary Fund | January 2023 9", - "type": "ListItem" - }, - { - "element_id": "10ea5d031e3bc9c3e108ffdd979b856d", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Image", + "element_id": "10ea5d031e3bc9c3e108ffdd979b856d", + "text": "BOX 1. GL AL FINANCIAL STABILITY UPDATE", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "BOX 1. GL AL FINANCIAL STABILITY UPDATE", - "type": "Image" - }, - { - "element_id": "07d29d53937f3f8bf76548a1f7c8b017", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "07d29d53937f3f8bf76548a1f7c8b017", + "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates\u2014the terminal rate\u2014has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Overall, financial stability risks remain elevated as investors reassess their inflation and monetary policy outlook. Global financial conditions have eased somewhat since the October 2022 Global Financial Stability Report, driven largely by changing market expectations regarding the interest rate cycle (Figure 1.1). While the expected peak in policy rates—the terminal rate—has risen, markets now also expect the subsequent fall in rates will be significantly faster, and further, than what was forecast in October (Figure 1.2). As a result, global bond yields have recently declined, corporate spreads have tightened, and equity markets have rebounded. That said, central banks are likely to continue to tighten monetary policy to fight inflation, and concerns that this restrictive stance could tip the economy into a recession have increased in major advanced economies.", - "type": "NarrativeText" - }, - { - "element_id": "f1dc7632c3c313cc34bc6a4f64af05b5", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f1dc7632c3c313cc34bc6a4f64af05b5", + "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Figure 1.1. Global Financial Conditions: Selected Regions (Standard deviations from mean)", - "type": "NarrativeText" - }, - { - "element_id": "16c33c9e209b518305829584935190dd", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Image", + "element_id": "16c33c9e209b518305829584935190dd", + "text": "7 6 5 4 United States Euro area China Other AEs Other EMs October 2022 GFSR 3 2 1 0 \u20131 \u20132 \u20133 2006 08 08 06 10 10 12 12 14 16 14 16 18 18 20 22 22 20 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "7 6 5 4 United States Euro area China Other AEs Other EMs October 2022 GFSR 3 2 1 0 –1 –2 –3 2006 08 08 06 10 10 12 12 14 16 14 16 18 18 20 22 22 20 ", - "type": "Image" - }, - { - "element_id": "f61c8f5157c0dedec7bd07b89b5818fd", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "f61c8f5157c0dedec7bd07b89b5818fd", + "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Sources: Bloomberg Finance L.P.; Haver Analytics; national data sources; and IMF staff calculations. Note: AEs = advanced economies; EMs = emerging markets. GFSR = Global Financial Stability Report.", - "type": "FigureCaption" - }, - { - "element_id": "fd3223759c188ca12a81861f36083bb9", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "fd3223759c188ca12a81861f36083bb9", + "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors\u2019 anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia\u2019s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives.", - "type": "NarrativeText" - }, - { - "element_id": "9f931c3b54669a87892391846beda98c", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "9f931c3b54669a87892391846beda98c", + "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)", - "type": "NarrativeText" - }, - { - "element_id": "bffdca980631ced1f96ab886cf9dcf22", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "Image", + "element_id": "bffdca980631ced1f96ab886cf9dcf22", + "text": "Latest October 2022 GFSR 6 1. United States 2. Euro area 5 4 3 2 5 4 3 2 1 1 Oct. 22 Apr. 23 Oct. 23 Dec. 24 Dec. 26 Oct. 22 Apr. 23 Oct. 23 Dec. 24 Dec. 26 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Latest October 2022 GFSR 6 1. United States 2. Euro area 5 4 3 2 5 4 3 2 1 1 Oct. 22 Apr. 23 Oct. 23 Dec. 24 Dec. 26 Oct. 22 Apr. 23 Oct. 23 Dec. 24 Dec. 26 ", - "type": "Image" - }, - { - "element_id": "f787afd6d27e6bfdc89bd82044a417a6", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f787afd6d27e6bfdc89bd82044a417a6", + "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Given the tension between rising recession risks and monetary policy uncertainty, markets have seen significant volatility. While many central banks in advanced economies have stepped down the size of hikes, they have also explicitly stated they will need to keep rates higher, for a longer period of time, to tamp down inflation. Risk assets could face significant declines if earnings retrench further or if investors reassess their outlook for monetary policy given central bank communications. Globally, the partial reversal of the dollar rally has contributed to recent easing due to improved risk appetite, and some emerging market central banks have paused tightening amid tentative signs that inflation may have peaked.", - "type": "NarrativeText" - }, - { - "element_id": "b0002e1597c04292e2e01d4e15cb0dd7", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b0002e1597c04292e2e01d4e15cb0dd7", + "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "Financial market volatility is expected to remain elevated and could be exacerbated by poor market liquidity. For some asset classes (such as US Treasuries), liquidity has deteriorated to the March 2020 lows of the COVID-19 pandemic. With the process of central bank balance sheet reduction (quantitative tightening) underway, market liquidity is expected to remain challenging.", - "type": "NarrativeText" - }, - { - "element_id": "2a9dc522d08d54609f97f566911ceed1", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "ListItem", + "element_id": "2a9dc522d08d54609f97f566911ceed1", + "text": "10 \u2014 International Monetary Fund | January 2023", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "10 — International Monetary Fund | January 2023", - "type": "ListItem" - }, - { - "element_id": "103cccd2abc41bc4beb9e70dff33123a", - "metadata": { + "page_number": 11, "data_source": { - "date_modified": "2023-02-14T07:31:28+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", - "version": "265756457651539296174748931590365722430" - }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "103cccd2abc41bc4beb9e70dff33123a", + "text": "WEO Update \u00a9 2023 \u2022 ISBN: 979-8-40023-224-4", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 11 - }, - "text": "WEO Update © 2023 • ISBN: 979-8-40023-224-4", - "type": "NarrativeText" + "page_number": 11, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "c7eed4fc056b089a98f6a3ad9ec9373e", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676359888.0", + "date_modified": "1676359888.0" + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).json similarity index 55% rename from test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json rename to test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).json index b64acf678b..160ca011c0 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).json @@ -1,2165 +1,2268 @@ [ { + "type": "Title", "element_id": "0128236cd54e6931c27ccea98ad6fc92", + "text": "WORLD NUCLEAR", "metadata": { - "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "WORLD NUCLEAR", - "type": "Title" - }, - { - "element_id": "b0e17666072949c5c63fdec1d0e85d97", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "b0e17666072949c5c63fdec1d0e85d97", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "618fa7e35cfb6cbe4ea708a3f9d12c65", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "618fa7e35cfb6cbe4ea708a3f9d12c65", + "text": "The Silent Giant", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "The Silent Giant", - "type": "Title" - }, - { - "element_id": "78d1b366d03c8d58711dcf65c58cdc4c", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "78d1b366d03c8d58711dcf65c58cdc4c", + "text": "The need for nuclear in a clean energy system", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "The need for nuclear in a clean energy system", - "type": "NarrativeText" - }, - { - "element_id": "bd02aa501cdf1029587d15a3faa6f97c", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "bd02aa501cdf1029587d15a3faa6f97c", + "text": "//s88ciation", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "//s88ciation", - "type": "Title" - }, - { - "element_id": "61dcf576097c58f62ae82967ec6528e3", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "61dcf576097c58f62ae82967ec6528e3", + "text": "Executive Summary", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Executive Summary", - "type": "Title" - }, - { - "element_id": "4ad0ebfdd0c5031abf1410d785c5fb8d", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4ad0ebfdd0c5031abf1410d785c5fb8d", + "text": "In a world centred on short-term fixes, many of the traits that make nuclear energy a key player in the transition to a sustainable world are not properly valued and often taken for granted. Reflecting on the popular discourse in the world of energy politics it would seem that renewables, and renewables alone, will be responsible for, and capable of, delivering a zero-carbon energy system \u2013 and that it is just a matter of time.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "In a world centred on short-term fixes, many of the traits that make nuclear energy a key player in the transition to a sustainable world are not properly valued and often taken for granted. Reflecting on the popular discourse in the world of energy politics it would seem that renewables, and renewables alone, will be responsible for, and capable of, delivering a zero-carbon energy system – and that it is just a matter of time.", - "type": "NarrativeText" - }, - { - "element_id": "13a0506c11e4b168cbe43eefa1cfd8b3", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "13a0506c11e4b168cbe43eefa1cfd8b3", + "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "The reality today is that both global carbon dioxide emissions and fossil fuel use are still on the rise. This does not only make the battle against climate change much harder, but also results in hundreds of thousands of pollution deaths every year.", - "type": "NarrativeText" - }, - { - "element_id": "d6f5f00c744da6f05c5a6019af118084", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "d6f5f00c744da6f05c5a6019af118084", + "text": "Energy is the essential agent for promoting human development, and global demand is projected to increase significantly in the coming decades. Securing access to modern and affordable energy is essential for lifting people out of poverty, and for promoting energy independence and economic growth.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Energy is the essential agent for promoting human development, and global demand is projected to increase significantly in the coming decades. Securing access to modern and affordable energy is essential for lifting people out of poverty, and for promoting energy independence and economic growth.", - "type": "NarrativeText" - }, - { - "element_id": "cc18912a4626d26c5aec9cc160b35508", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "cc18912a4626d26c5aec9cc160b35508", + "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors \u2013 a grand total of 445 in 30 countries \u2013 are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Nuclear energy is a proven solution with a long and well-established track record. Nuclear reactors – a grand total of 445 in 30 countries – are the low-carbon backbone of electricity systems, operating in the background, day in and day out, often out of sight and out of mind. Capable of generating immense amounts of clean power, they are the silent giants upon which we rely daily.", - "type": "NarrativeText" - }, - { - "element_id": "d832caea40445d014c63847057712dd9", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "d832caea40445d014c63847057712dd9", + "text": "Nuclear energy has shown \u2013 be it in France or Sweden \u2013 that it has the potential to be the catalyst for delivering sustainable energy transitions, long before climate change was on the agenda. The use of nuclear energy is the fast track to a high-powered and clean energy system, which not only delivers a healthier environment and an affordable supply of electricity, but also strengthens energy security and helps mitigate climate change.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Nuclear energy has shown – be it in France or Sweden – that it has the potential to be the catalyst for delivering sustainable energy transitions, long before climate change was on the agenda. The use of nuclear energy is the fast track to a high-powered and clean energy system, which not only delivers a healthier environment and an affordable supply of electricity, but also strengthens energy security and helps mitigate climate change.", - "type": "NarrativeText" - }, - { - "element_id": "cb21e65d859f5353e63ba5e58ee110cd", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "cb21e65d859f5353e63ba5e58ee110cd", + "text": "The global nuclear industry, led by World Nuclear Association, is ready to take on the challenge. As part of the Harmony Programme, we have set a target to build an additional 1000GWe of reactors across the world before 2050, bringing the global share of electricity production of nuclear to 25%.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "The global nuclear industry, led by World Nuclear Association, is ready to take on the challenge. As part of the Harmony Programme, we have set a target to build an additional 1000GWe of reactors across the world before 2050, bringing the global share of electricity production of nuclear to 25%.", - "type": "NarrativeText" - }, - { - "element_id": "b309f2b080b879e4757fd9987176fcda", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b309f2b080b879e4757fd9987176fcda", + "text": "In order to realise the full potential of nuclear energy we have identified three key areas where actions are required:", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "In order to realise the full potential of nuclear energy we have identified three key areas where actions are required:", - "type": "NarrativeText" - }, - { - "element_id": "639154b6720aa40ea33368e0a6ff4f69", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "ListItem", + "element_id": "639154b6720aa40ea33368e0a6ff4f69", + "text": "\u2022 The need to create a level playing field that values reliability and energy security", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "• The need to create a level playing field that values reliability and energy security", - "type": "ListItem" - }, - { - "element_id": "66b1d9ff77839d9f720a1661ea86c7da", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "ListItem", + "element_id": "66b1d9ff77839d9f720a1661ea86c7da", + "text": "\u2022 The need for harmony in the nuclear regulatory environment", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "• The need for harmony in the nuclear regulatory environment", - "type": "ListItem" - }, - { - "element_id": "a54680b2355d72199e09e14f9febd214", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "ListItem", + "element_id": "a54680b2355d72199e09e14f9febd214", + "text": "\u2022 The need for a holistic safety paradigm for the whole electricity system.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "• The need for a holistic safety paradigm for the whole electricity system.", - "type": "ListItem" - }, - { - "element_id": "a23cef34acc8302738b626ea2d398c1d", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "a23cef34acc8302738b626ea2d398c1d", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "7e1e96312bb39326c5fd3c7e891ce643", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "7e1e96312bb39326c5fd3c7e891ce643", + "text": "1", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "1", - "type": "UncategorizedText" - }, - { - "element_id": "dc3c4d9a725b0ead89311bb08bd251ae", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Header", + "element_id": "dc3c4d9a725b0ead89311bb08bd251ae", + "text": "2", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "2", - "type": "Header" - }, - { - "element_id": "107793a98ed713604c924115b1353a5d", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "107793a98ed713604c924115b1353a5d", + "text": "The drivers for a clean energy system", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "The drivers for a clean energy system", - "type": "Title" - }, - { - "element_id": "38ff5b395d02c9be0a73a2bb9d38573b", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "38ff5b395d02c9be0a73a2bb9d38573b", + "text": "Electricity is central to modern life \u2013 it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Electricity is central to modern life – it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet.", - "type": "NarrativeText" - }, - { - "element_id": "d91a006a6f910ff9906f5fb86b48862b", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "d91a006a6f910ff9906f5fb86b48862b", + "text": "45,000 Marine 40,000 CSP 35,000 Solar PV Geothermal 30,000 Wind 25,000 Bioenergy 20,000 Hydro Nuclear 15,000 Gas 10,000 Oil 5,000 Coal 0 2000 2010 2020 2030 2040 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "45,000 Marine 40,000 CSP 35,000 Solar PV Geothermal 30,000 Wind 25,000 Bioenergy 20,000 Hydro Nuclear 15,000 Gas 10,000 Oil 5,000 Coal 0 2000 2010 2020 2030 2040 ", - "type": "Image" - }, - { - "element_id": "33098f034d26b209b3e70be26465960f", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "33098f034d26b209b3e70be26465960f", + "text": "h W T", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "h W T", - "type": "Title" - }, - { - "element_id": "01f202d61754b07afdbc86562947a713", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "01f202d61754b07afdbc86562947a713", + "text": "Figure 1. IEA projected electricity production and sources to 2040 i", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Figure 1. IEA projected electricity production and sources to 2040 i", - "type": "FigureCaption" - }, - { - "element_id": "b4a845da7b0af6bf15d5fb9660b35338", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b4a845da7b0af6bf15d5fb9660b35338", + "text": "The challenge before us, however, goes far beyond just electricity \u2013 we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "The challenge before us, however, goes far beyond just electricity – we will need to find ways to decarbonize all parts of the economy, and we need solutions that are sustainable in the long-term. That means changing the way we heat our homes and power our industrial processes, as well as ensuring that the way we travel, export our products and ship our food moves away from fossil fuels.", - "type": "NarrativeText" - }, - { - "element_id": "034833d2c3067bb56f6083bed72f01e5", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "034833d2c3067bb56f6083bed72f01e5", + "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear \u2013 instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Despite the very considerable efforts to decarbonize the economy and the countless billions spent, our world remains heavily addicted to fossil fuels. The trend is clear – instead of reducing our dependence on fossil fuels, we are increasing it (Figure 2). As a direct result, greenhouse gas emissions continue to rise when they need to drastically fall.", - "type": "NarrativeText" - }, - { - "element_id": "471efbe23e80b54343222bebe9140db9", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "471efbe23e80b54343222bebe9140db9", + "text": "h W G", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "h W G", - "type": "Title" - }, - { - "element_id": "6145856d34f52d6758bdb7b21375c456", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "6145856d34f52d6758bdb7b21375c456", + "text": "30,000,000 High-carbon Low-carbon 25,000,000 20,000,000 15,000,000 10,000,000 5,000,000 0 1990 1995 2000 2005 2010 2015 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "30,000,000 High-carbon Low-carbon 25,000,000 20,000,000 15,000,000 10,000,000 5,000,000 0 1990 1995 2000 2005 2010 2015 ", - "type": "Image" - }, - { - "element_id": "f078972fc2928675f735e4d249863c0d", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "f078972fc2928675f735e4d249863c0d", + "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Figure 2. Worldwide electricity generation by fuel (1990-2016)ii", - "type": "FigureCaption" - }, - { - "element_id": "bfd27a5d99fab08b146fa729a46eb575", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "bfd27a5d99fab08b146fa729a46eb575", + "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable \u2013 no one should have to choose between heating their home, and essentials like eating \u2013 as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "We need to deliver a worldwide transformation that is socially, economically and environmentally sustainable. We need a system that is affordable – no one should have to choose between heating their home, and essentials like eating – as well as helping to alleviate poverty, and ensure the realization of human potential globally. We need a power source that can not only help us mitigate the effects of climate change and environmental degradation, but can also help bring the enormous benefits of reliable electricity supply to the corners of the world that do not have access to it.", - "type": "NarrativeText" - }, - { - "element_id": "7d04fe32568029462cb22a1d00c986f6", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7d04fe32568029462cb22a1d00c986f6", + "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world\u2019s roads.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Nuclear energy is already making a major contribution. By using nuclear energy rather than fossil fuels, we currently avoid the emission of more than 2500 million tonnes of carbon dioxide every year. To put that into perspective, it is the equivalent of removing about 400 million cars from the world’s roads.", - "type": "NarrativeText" - }, - { - "element_id": "74fede67ccf426e4acb08ec09aef38de", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "74fede67ccf426e4acb08ec09aef38de", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "decd1a58dda4f427da1a4b5468337852", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "decd1a58dda4f427da1a4b5468337852", + "text": "Modern society is dependent on the steady supply of electricity, every day of the year \u2013 regardless of weather, season or time of day \u2013 and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency\u2019s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Modern society is dependent on the steady supply of electricity, every day of the year – regardless of weather, season or time of day – and nuclear energy is particularly well-suited to providing this service. Given that the majority of baseload supply is fossil-based, an increase in the use of nuclear energy would result in a rapid decarbonization of the electricity system. The International Energy Agency’s (IEA) recent report III on nuclear energy highlighted the importance of dependable baseload electricity generators and the need to properly value and compensate them for the electricity security and reliability services they provide.", - "type": "NarrativeText" - }, - { - "element_id": "ef9cc2d4562f48ab8ef202bce5b108c0", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "ef9cc2d4562f48ab8ef202bce5b108c0", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "ca9838a7dfb6d5d996f1ab6acd7642af", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "ca9838a7dfb6d5d996f1ab6acd7642af", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "c65e2845b99488cbce5ee718c2bd054a", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "c65e2845b99488cbce5ee718c2bd054a", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "812d182abc51f7c24b14024f28bdc0b7", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Footer", + "element_id": "812d182abc51f7c24b14024f28bdc0b7", + "text": "3", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "3", - "type": "Footer" - }, - { - "element_id": "3ef3ab436a1c66d6c7aa00cdcfc40873", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Header", + "element_id": "3ef3ab436a1c66d6c7aa00cdcfc40873", + "text": "4", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "4", - "type": "Header" - }, - { - "element_id": "c73abd40dd196cc77bb03f2567f46b03", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "c73abd40dd196cc77bb03f2567f46b03", + "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy\u2019s dependence of fossil fuels, and severely undermines the apparently \u2018green credentials\u2019 of many renewables.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Despite impressive recent growth, the stark reality is that renewables alone will not be able to resolve our dependence on fossil fuels. Clearly, the sun does not always shine, and the wind does not always blow, and this is compounded by the fact that many times these periods coincide with when electricity demand is at its highest, but renewables can be complementary to nuclear energy. Storage solutions, such as batteries, will not be able to power our societies for days or weeks when the weather is not favourable. Natural gas is currently the most used solution for the intermittency problem, which only serves to reinforce our economy’s dependence of fossil fuels, and severely undermines the apparently ‘green credentials’ of many renewables.", - "type": "NarrativeText" - }, - { - "element_id": "f7078a4205d6d8b21783483c5b426c02", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "f7078a4205d6d8b21783483c5b426c02", + "text": "Moving to a sustainable future", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Moving to a sustainable future", - "type": "Title" - }, - { - "element_id": "62b6b9ec7e962db41cf8818f42b2bc4d", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "62b6b9ec7e962db41cf8818f42b2bc4d", + "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5\u00b0C iv examined a large number of different scenarios for limiting global warming to 1.5\u00b0C. Of those scenarios which would achieve the 1.5\u00b0C target, the mean increase in nuclear energy\u2019s contribution to electricity production was 2.5 times higher compared to today. However, the \u2018middle-of-the-road\u2019 scenario \u2013 in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits \u2013 sees the need for nuclear increase by five times globally by 2050.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "The Intergovernmental Panel on Climate Change (IPCC) special report on Global Warming of 1.5°C iv examined a large number of different scenarios for limiting global warming to 1.5°C. Of those scenarios which would achieve the 1.5°C target, the mean increase in nuclear energy’s contribution to electricity production was 2.5 times higher compared to today. However, the ‘middle-of-the-road’ scenario – in which social, economic, and technological trends follow current patterns and would not require major changes to, for example, diet and travel habits – sees the need for nuclear increase by five times globally by 2050.", - "type": "NarrativeText" - }, - { - "element_id": "2673340156781d2b4e8c53921e661b1d", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2673340156781d2b4e8c53921e661b1d", + "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to \u2018\u2026 ensure competition on a level playing field\u2019 and that the \u2018\u2026 focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.\u2019 Such reforms should also ensure that reliability of electricity production is properly valued and compensated.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "The IEA has concluded that without an expanded contribution from nuclear energy, the already huge challenge of achieving emissions reductions will become drastically harder and more costly. In their latest report on nuclear energy v, published in 2019, they also conclude that not using nuclear would have negative implications for energy security and result in higher costs for the consumers. The IEA recommends policy reforms to ‘… ensure competition on a level playing field’ and that the ‘… focus should be on designing electricity markets in a way that values the clean energy and energy security attributes of low-carbon technologies, including nuclear power.’ Such reforms should also ensure that reliability of electricity production is properly valued and compensated.", - "type": "NarrativeText" - }, - { - "element_id": "8f9b23ea4d2d725ef37c7766f75d8b5d", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "8f9b23ea4d2d725ef37c7766f75d8b5d", + "text": "As part of the Harmony Programme, the world\u2019s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "As part of the Harmony Programme, the world’s nuclear industry has identified three key policy areas for action to unlock the true potential of nuclear energy - the need for a level playing field, the harmonization of regulations and the establishment of an effective safety paradigm.", - "type": "NarrativeText" - }, - { - "element_id": "a24ec4b8d4de88ccfcb84af80a57d36d", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "a24ec4b8d4de88ccfcb84af80a57d36d", + "text": "In regard to the need for a level playing field, we see that many of the world\u2019s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "In regard to the need for a level playing field, we see that many of the world’s electricity markets operate in an unsustainable fashion, dominated by short-term thinking. Electricity supply which is affordable, reliable and available 24/7 generates broad societal benefits, and as seen in Figure 3, nuclear is one of the most affordable electricity sources.", - "type": "NarrativeText" - }, - { - "element_id": "4178b04e063bf5bc18e18b6c4ce1fa58", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "4178b04e063bf5bc18e18b6c4ce1fa58", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "792f93fd194ae2fea77fd5a477bb1d5e", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "792f93fd194ae2fea77fd5a477bb1d5e", + "text": "h W M / $", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "h W M / $", - "type": "Title" - }, - { - "element_id": "9cad28e919656850c215a73aea60024c", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "9cad28e919656850c215a73aea60024c", + "text": "300 250 200 150 100 50 0 m ercial Photovoltaic C o m O nshore Wind Offshore Wind N uclear C C G T C oal ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "300 250 200 150 100 50 0 m ercial Photovoltaic C o m O nshore Wind Offshore Wind N uclear C C G T C oal ", - "type": "Image" - }, - { - "element_id": "2821726ab54800008a4692eb9f89171f", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "2821726ab54800008a4692eb9f89171f", + "text": "Figure 3. Comparative cost projections for main electricity generators vi", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Figure 3. Comparative cost projections for main electricity generators vi", - "type": "FigureCaption" - }, - { - "element_id": "3d5cde2213eaec20cc2c5c77e68354cb", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "3d5cde2213eaec20cc2c5c77e68354cb", + "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "However, markets fail to give due credit to electricity generators, such as nuclear energy, that are able to meet these societal demands. This has resulted in situations where nuclear energy has struggled to compete with energy sources that have been subsidized, do not pay the hidden costs brought on by their intermittency (e.g. costly backup provisions and investments in the grid), or do not have to take responsibility for using our common atmosphere as a dumping ground.", - "type": "NarrativeText" - }, - { - "element_id": "e37ee5f00fe8522280ab816ef335e148", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "e37ee5f00fe8522280ab816ef335e148", + "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Additionally, electricity markets fail to recognize the relative costs of different forms of electricity generation. Whilst the nuclear industry takes responsibility for its lifecycle costs (including decommissioning and waste management), other electricity generators do not. Fossil fuel generators are rarely required to pay the price in line with the environmental and health damage that their emissions cause, whilst the cost of wind and solar does not include the disposal of the sometimes toxic materials at the end of their life.", - "type": "NarrativeText" - }, - { - "element_id": "f26c420afe7376f1d600ec5c54a51c46", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f26c420afe7376f1d600ec5c54a51c46", + "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "In regard to the need to harmonize regulations, multiple regulatory barriers stemming from diverse national licensing processes and safety requirements currently limit global nuclear trade and investment. A lack of international standardization places unnecessary regulatory burdens on nuclear activities and causes delays in the licensing of new designs, hindering innovation.", - "type": "NarrativeText" - }, - { - "element_id": "0bd2429b9134c65e8e70f0763ba328c7", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "0bd2429b9134c65e8e70f0763ba328c7", + "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony \u2018\u2026causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves\u2026This results in increased costs and reduced predictability in project execution\u2019. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "The International Atomic Energy Agency (IAEA) has highlighted the importance of addressing this issue, concluding that the lack of regulatory harmony ‘…causes many drawbacks for the entire nuclear industry, including developers, vendors, operators and even regulators themselves…This results in increased costs and reduced predictability in project execution’. vii It is therefore crucial that we harmonize the regulatory process to address these weaknesses, and avoid unnecessary duplication and inconsistencies.", - "type": "NarrativeText" - }, - { - "element_id": "b6e8daf0bbaa32c5f37ac261b325affb", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "b6e8daf0bbaa32c5f37ac261b325affb", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "923523789c0e08c68514bd56bdff607e", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Footer", + "element_id": "923523789c0e08c68514bd56bdff607e", + "text": "5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "5", - "type": "Footer" - }, - { - "element_id": "eda2d3adf2e4a1f9064252ed95826bf6", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Header", + "element_id": "eda2d3adf2e4a1f9064252ed95826bf6", + "text": "6", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "6", - "type": "Header" - }, - { - "element_id": "2ee71305ea2aeb159dce93675cb959d8", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2ee71305ea2aeb159dce93675cb959d8", + "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "In regard to the need for a holistic safety paradigm for the whole electricity system, we need to consider safety from a societal perspective, something the current energy system fails to do. The health, environmental and safety benefits of nuclear energy are not sufficiently understood and valued when compared with other electricity sources. Nuclear energy remains the safest form of electricity generation (Figure 4). Additionally, the use of nuclear consistently prevents many tens of thousands of deaths (mainly resulting from air pollution) every year by avoiding the use of coal - lifesaving measures which must be better recognised and valued.", - "type": "NarrativeText" - }, - { - "element_id": "b0f97a1e3e431fa2d186e2e8fde4dc18", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "b0f97a1e3e431fa2d186e2e8fde4dc18", + "text": "140 e 120 100 120 99.5 80 60 71.9 40 20 0 C oal Oil N atural gas 8.5 1.78 Offshore wind O nshore wind (G erm any) (U K) 0.245 S olar P V <0.01 N uclear* ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "140 e 120 100 120 99.5 80 60 71.9 40 20 0 C oal Oil N atural gas 8.5 1.78 Offshore wind O nshore wind (G erm any) (U K) 0.245 S olar P V <0.01 N uclear* ", - "type": "Image" - }, - { - "element_id": "ff300ef7a936a82104bf89668428c28c", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "ff300ef7a936a82104bf89668428c28c", + "text": "r a e y", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "r a e y", - "type": "NarrativeText" - }, - { - "element_id": "f6ad5695f9388190089a73994c4afbf3", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "f6ad5695f9388190089a73994c4afbf3", + "text": "W T", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "W T", - "type": "Title" - }, - { - "element_id": "54f6293185bdb444eb1802ba80bee640", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "54f6293185bdb444eb1802ba80bee640", + "text": "r e p s e i t i l", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "r e p s e i t i l", - "type": "NarrativeText" - }, - { - "element_id": "a98bee8174850612900731525ab6659a", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "a98bee8174850612900731525ab6659a", + "text": "a t a F", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "a t a F", - "type": "Title" - }, - { - "element_id": "15cd2f288bebfc1813d77e0aa050a0f8", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "15cd2f288bebfc1813d77e0aa050a0f8", + "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Figure 4. Comparison of number of fatalities due to electricity generation viii", - "type": "FigureCaption" - }, - { - "element_id": "33b46a3854e000ba41f08b7ef320f2f8", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "33b46a3854e000ba41f08b7ef320f2f8", + "text": "Nuclear for a sustainable tomorrow", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Nuclear for a sustainable tomorrow", - "type": "Title" - }, - { - "element_id": "bd7211e18d89695df6a6de6a483df434", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "bd7211e18d89695df6a6de6a483df434", + "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living \u2013 without having to sacrifice the planet or their own well-being.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Nuclear energy is already making a significant contribution to providing the world with clean and abundant electricity, and has a proven track record of being a reliable workhorse around the world. Countries like France, Sweden and Switzerland have proven that it is possible to divorce economic growth from an increase in damaging emissions and over the timescales required to effectively challenge climate change and environmental degradation (Figures 5 and 6). Nuclear can ensure that fast-growing populations achieve rising standards of living – without having to sacrifice the planet or their own well-being.", - "type": "NarrativeText" - }, - { - "element_id": "89efb2f48dddb7494311db2e7042ee35", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "89efb2f48dddb7494311db2e7042ee35", + "text": "100 90 Coal Gas/Oil 80 Biofuels/Waste 70 Wind/Solar 60 Hydro Nuclear 50 40 30 20 10 0 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "100 90 Coal Gas/Oil 80 Biofuels/Waste 70 Wind/Solar 60 Hydro Nuclear 50 40 30 20 10 0 ", - "type": "Image" - }, - { - "element_id": "c591dd1b7ac1725959db78546ef69f71", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "c591dd1b7ac1725959db78546ef69f71", + "text": "%", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "%", - "type": "UncategorizedText" - }, - { - "element_id": "dc980c7616ede261ebcd41fe78bda526", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "dc980c7616ede261ebcd41fe78bda526", + "text": "France", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "France", - "type": "Title" - }, - { - "element_id": "90bae966dbf38298ae084516c468e916", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "90bae966dbf38298ae084516c468e916", + "text": "Sweden", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Sweden", - "type": "Title" - }, - { - "element_id": "29a073bcccb94157b4ae3f71ff1f88ea", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "29a073bcccb94157b4ae3f71ff1f88ea", + "text": "Switzerland", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Switzerland", - "type": "Title" - }, - { - "element_id": "f2db72028ec4f864ab56fdc75cd85e35", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "f2db72028ec4f864ab56fdc75cd85e35", + "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix", - "type": "FigureCaption" - }, - { - "element_id": "808a4229fc57f6a2a480836225c053a0", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "808a4229fc57f6a2a480836225c053a0", + "text": "600 500 Non-hydro ren. & waste 400 Nuclear Natural gas 300 Hydro 200 Oil Coal 100 0 ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "600 500 Non-hydro ren. & waste 400 Nuclear Natural gas 300 Hydro 200 Oil Coal 100 0 ", - "type": "Image" - }, - { - "element_id": "d25c222637cda44434c666939ff6a267", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "d25c222637cda44434c666939ff6a267", + "text": "h W T", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "h W T", - "type": "Title" - }, - { - "element_id": "b2b7585faff336f0134b3b48464cf4c6", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "b2b7585faff336f0134b3b48464cf4c6", + "text": "1974 1980 1985 1990 1995 2000 2005 2010 2017", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "1974 1980 1985 1990 1995 2000 2005 2010 2017", - "type": "FigureCaption" - }, - { - "element_id": "f71ca4190bbe3083f902fcb238f6c22f", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "f71ca4190bbe3083f902fcb238f6c22f", + "text": "Figure 6. The lasting decarbonization of French electricity and nuclear\u2019s ability to meet growing demand x", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Figure 6. The lasting decarbonization of French electricity and nuclear’s ability to meet growing demand x", - "type": "FigureCaption" - }, - { - "element_id": "04e02981e7fd3a4a7d8e5a064f0f5775", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "04e02981e7fd3a4a7d8e5a064f0f5775", + "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy \u2013 enhanced independence and security in uncertain times.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "The incredible energy density of uranium means that just a few kilos is all that is required to provide one person with enough power for a lifetime. Uranium is abundant and can be found in many parts of the world, as well as in seawater. Furthermore, spent nuclear fuel is well managed and can in most cases be recycled to produce even more power. By using nuclear energy, countries are able to take charge of their own destinies by decreasing their reliance on imported energy – enhanced independence and security in uncertain times.", - "type": "NarrativeText" - }, - { - "element_id": "f05002752f0e36d49670de3052a5cb7d", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "f05002752f0e36d49670de3052a5cb7d", + "text": "One fuel pellet contains as much energy as a tonne of coal ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "One fuel pellet contains as much energy as a tonne of coal ", - "type": "Image" - }, - { - "element_id": "3689b3ec615ae8ea26856a7e326987ce", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "3689b3ec615ae8ea26856a7e326987ce", + "text": "Unlike other power sources, nuclear energy helps us reduce our total footprint, going beyond just the environment. When accounting for factors such as cost (e.g. fuel and construction costs), carbon (lifecycle greenhouse gas emissions), water and land footprints, nuclear is far ahead of all other energy generators.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Unlike other power sources, nuclear energy helps us reduce our total footprint, going beyond just the environment. When accounting for factors such as cost (e.g. fuel and construction costs), carbon (lifecycle greenhouse gas emissions), water and land footprints, nuclear is far ahead of all other energy generators.", - "type": "NarrativeText" - }, - { - "element_id": "4ae8fb0de2a9299285a47864f4e352d7", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4ae8fb0de2a9299285a47864f4e352d7", + "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Nuclear energy offers a multitude of services beyond just electricity. With nuclear, we can decarbonize the way we heat our homes, provide process heat for industry, and ensure access to clean water. As freshwater supplies come under increasing pressure worldwide, nuclear reactors can provide desalination, ensuring a reliable flow of fresh drinking water in areas where it is scarce.", - "type": "NarrativeText" - }, - { - "element_id": "d7bc33bc5c4eb3d83f8a56f968d1f232", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Header", + "element_id": "d7bc33bc5c4eb3d83f8a56f968d1f232", + "text": "7", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "7", - "type": "Header" - }, - { - "element_id": "d484deba727a0cb854f93482219a24df", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Header", + "element_id": "d484deba727a0cb854f93482219a24df", + "text": "8", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "8", - "type": "Header" - }, - { - "element_id": "f11771cc97b73e3ec86beb7752b20371", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f11771cc97b73e3ec86beb7752b20371", + "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet \u2013 all powered by the atom \u2013 we are able to address one of the key challenges to a sustainable economy.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet – all powered by the atom – we are able to address one of the key challenges to a sustainable economy.", - "type": "NarrativeText" + "page_number": 10, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } }, { + "type": "NarrativeText", "element_id": "945c8c5aeca9696e2a3c0a9b2de0e1aa", + "text": "We cannot afford to wait \u2013 we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences.", "metadata": { - "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "We cannot afford to wait – we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences.", - "type": "NarrativeText" - }, - { - "element_id": "b79b5b8e937c1a714d3286c6d70881aa", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b79b5b8e937c1a714d3286c6d70881aa", + "text": "Nuclear power is the silent giant of today\u2019s energy system \u2013 it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world \u2013 enabling us to pass on a cleaner planet to our children.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Nuclear power is the silent giant of today’s energy system – it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world – enabling us to pass on a cleaner planet to our children.", - "type": "NarrativeText" - }, - { - "element_id": "936ab104c18b374610d851e57072f45e", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "936ab104c18b374610d851e57072f45e", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "d88e18fce3a0a1e4f483fc03555b2dbd", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "d88e18fce3a0a1e4f483fc03555b2dbd", + "text": "References", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "References", - "type": "Title" - }, - { - "element_id": "97b38e1436971a086af6f1e3fa907126", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "97b38e1436971a086af6f1e3fa907126", + "text": "i", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "i", - "type": "Title" - }, - { - "element_id": "112b81b1155f39c21f5cc5ae789f2acd", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "112b81b1155f39c21f5cc5ae789f2acd", + "text": "ii", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "ii", - "type": "Title" - }, - { - "element_id": "25b3d0bfc74512aec787c31f5889ae32", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "25b3d0bfc74512aec787c31f5889ae32", + "text": "iii", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "iii", - "type": "Title" - }, - { - "element_id": "7806e49f513bb91010cfa16fd65d1e27", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "7806e49f513bb91010cfa16fd65d1e27", + "text": "iv", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "iv", - "type": "Title" - }, - { - "element_id": "84643eaf522b55520d3286fc650b8565", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "84643eaf522b55520d3286fc650b8565", + "text": "Vv", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Vv", - "type": "Title" - }, - { - "element_id": "65e28360f4a9960fd1a079d3bc38f5fa", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "65e28360f4a9960fd1a079d3bc38f5fa", + "text": "vi", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "vi", - "type": "Title" - }, - { - "element_id": "d925df783207d4d09021cab09a5f2799", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "d925df783207d4d09021cab09a5f2799", + "text": "vii", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "vii", - "type": "Title" - }, - { - "element_id": "efc2aec0f176f924eeba5e86eb6691ba", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "efc2aec0f176f924eeba5e86eb6691ba", + "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ \u2013 Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions \u2013 with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT \u2013 with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 \u00b0C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity \u2013 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf", - "type": "NarrativeText" - }, - { - "element_id": "01a35264bb99e41c06ee8164588c3c0d", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "ListItem", + "element_id": "01a35264bb99e41c06ee8164588c3c0d", + "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)", - "type": "ListItem" - }, - { - "element_id": "62ac61c2f315bde0967d8b87f5a8d22f", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "UncategorizedText", + "element_id": "62ac61c2f315bde0967d8b87f5a8d22f", + "text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "and NRC SOARCA study 2015 International Energy Agency (2018), Electricity Information 2018 https://webstore.iea.org/electricity-information-2018-overview Ibid.", - "type": "UncategorizedText" - }, - { - "element_id": "e1bd8d64a882d38da22d24986334d476", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Title", + "element_id": "e1bd8d64a882d38da22d24986334d476", + "text": "ix", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "ix", - "type": "Title" - }, - { - "element_id": "ab8476cb57b2e0de5a27e919c27a88ee", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "ListItem", + "element_id": "ab8476cb57b2e0de5a27e919c27a88ee", + "text": "x", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "x", - "type": "ListItem" - }, - { - "element_id": "95f8be8e7552c5b5045246a1f7df0e0a", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "95f8be8e7552c5b5045246a1f7df0e0a", + "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Photo credits: Front cover: Mike Baird; page 2: Vattenfall; page 4: Getty Images; page 5: Adobe Stock; page 6: Rosatom; page 8: Dean Calma, IAEA; page 10: Kazatomprom; page 11: EDF.", - "type": "NarrativeText" - }, - { - "element_id": "a839ef713de31c7a993e697b53db62f1", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "a839ef713de31c7a993e697b53db62f1", + "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", - "type": "NarrativeText" - }, - { - "element_id": "7ef54c757ae752a9cc65e9908abad79d", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7ef54c757ae752a9cc65e9908abad79d", + "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", - "type": "NarrativeText" - }, - { - "element_id": "81ad08c457e3d3dfc6e16eaf515f8529", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "81ad08c457e3d3dfc6e16eaf515f8529", + "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", - "type": "NarrativeText" - }, - { - "element_id": "15e1605f58317fd91ce16b4c7de232de", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "Image", + "element_id": "15e1605f58317fd91ce16b4c7de232de", + "text": "contributing to the energy debate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "contributing to the energy debate.", - "type": "Image" - }, - { - "element_id": "65f6ba3ba903bb4274220a1bc57979f5", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:10:36+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766" - }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "65f6ba3ba903bb4274220a1bc57979f5", + "text": "The Silent Giant \u00a9 2019 World Nuclear Association. Registered in England and Wales, company number 01215741", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "The Silent Giant © 2019 World Nuclear Association. Registered in England and Wales, company number 01215741", - "type": "NarrativeText" + "page_number": 12, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "8570bd087066350a84dd8d0ea86f11c6", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676196636.0", + "date_modified": "1676196636.0" + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.json similarity index 57% rename from test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json rename to test_unstructured_ingest/expected-structured-output/s3/page-with-formula.json index ea3413c43e..49147dbf2c 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.json @@ -1,359 +1,376 @@ [ { + "type": "NarrativeText", "element_id": "7581b3e14a56c276896da707704c221e", + "text": "output values. These are concatenated and once again projected, resulting in the final values, as depicted in Figure 2.", "metadata": { - "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "output values. These are concatenated and once again projected, resulting in the final values, as depicted in Figure 2.", - "type": "NarrativeText" - }, - { - "element_id": "5f0b9e258d134a12434aaa080638e9de", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "5f0b9e258d134a12434aaa080638e9de", + "text": "Multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. With a single attention head, averaging inhibits this.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. With a single attention head, averaging inhibits this.", - "type": "NarrativeText" - }, - { - "element_id": "2f5b0b2ffa8872dde498f34cd4af6bd9", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Formula", + "element_id": "2f5b0b2ffa8872dde498f34cd4af6bd9", + "text": "MultiHead(Q, K, V ) = Concat(head1, ..., headh)W O where headi = Attention(QW Q i , KW K i , V W V i )", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "MultiHead(Q, K, V ) = Concat(head1, ..., headh)W O where headi = Attention(QW Q i , KW K i , V W V i )", - "type": "Formula" - }, - { - "element_id": "703f1d4e9204c8b7ea94191f87138425", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "703f1d4e9204c8b7ea94191f87138425", + "text": "Where the projections are parameter matrices W Q and W O \u2208 Rhdv\u00d7dmodel. i \u2208 Rdmodel\u00d7dk , W K i \u2208 Rdmodel\u00d7dk , W V i \u2208 Rdmodel\u00d7dv", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Where the projections are parameter matrices W Q and W O ∈ Rhdv×dmodel. i ∈ Rdmodel×dk , W K i ∈ Rdmodel×dk , W V i ∈ Rdmodel×dv", - "type": "NarrativeText" - }, - { - "element_id": "e3e4737377b1614b02426ccc77bdcfc3", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "e3e4737377b1614b02426ccc77bdcfc3", + "text": "In this work we employ h = 8 parallel attention layers, or heads. For each of these we use dk = dv = dmodel/h = 64. Due to the reduced dimension of each head, the total computational cost is similar to that of single-head attention with full dimensionality.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "In this work we employ h = 8 parallel attention layers, or heads. For each of these we use dk = dv = dmodel/h = 64. Due to the reduced dimension of each head, the total computational cost is similar to that of single-head attention with full dimensionality.", - "type": "NarrativeText" - }, - { - "element_id": "31e28cc49f5625cec5e262fbb4b7e5f0", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Title", + "element_id": "31e28cc49f5625cec5e262fbb4b7e5f0", + "text": "3.2.3 Applications of Attention in our Model", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "3.2.3 Applications of Attention in our Model", - "type": "Title" - }, - { - "element_id": "f84e983da98f26bd5c141846aeffd0aa", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f84e983da98f26bd5c141846aeffd0aa", + "text": "The Transformer uses multi-head attention in three different ways:", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "The Transformer uses multi-head attention in three different ways:", - "type": "NarrativeText" - }, - { - "element_id": "fd24bf7bf21b4aab2a36021f9ebb253b", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "ListItem", + "element_id": "fd24bf7bf21b4aab2a36021f9ebb253b", + "text": "\u2022 In \"encoder-decoder attention\" layers, the queries come from the previous decoder layer, and the memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence. This mimics the typical encoder-decoder attention mechanisms in sequence-to-sequence models such as [38, 2, 9].", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "• In \"encoder-decoder attention\" layers, the queries come from the previous decoder layer, and the memory keys and values come from the output of the encoder. This allows every position in the decoder to attend over all positions in the input sequence. This mimics the typical encoder-decoder attention mechanisms in sequence-to-sequence models such as [38, 2, 9].", - "type": "ListItem" - }, - { - "element_id": "77762865993fd26c55c87cb45d75cad8", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "ListItem", + "element_id": "77762865993fd26c55c87cb45d75cad8", + "text": "\u2022 The encoder contains self-attention layers. In a self-attention layer all of the keys, values and queries come from the same place, in this case, the output of the previous layer in the encoder. Each position in the encoder can attend to all positions in the previous layer of the encoder.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "• The encoder contains self-attention layers. In a self-attention layer all of the keys, values and queries come from the same place, in this case, the output of the previous layer in the encoder. Each position in the encoder can attend to all positions in the previous layer of the encoder.", - "type": "ListItem" - }, - { - "element_id": "41b9b9d2a4329a8f6075f4776403c2de", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "ListItem", + "element_id": "41b9b9d2a4329a8f6075f4776403c2de", + "text": "\u2022 Similarly, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. We need to prevent leftward information flow in the decoder to preserve the auto-regressive property. We implement this inside of scaled dot-product attention by masking out (setting to \u2212\u221e) all values in the input of the softmax which correspond to illegal connections. See Figure 2.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "• Similarly, self-attention layers in the decoder allow each position in the decoder to attend to all positions in the decoder up to and including that position. We need to prevent leftward information flow in the decoder to preserve the auto-regressive property. We implement this inside of scaled dot-product attention by masking out (setting to −∞) all values in the input of the softmax which correspond to illegal connections. See Figure 2.", - "type": "ListItem" - }, - { - "element_id": "3b1f6da814e3826309b614d8b8dc9266", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Title", + "element_id": "3b1f6da814e3826309b614d8b8dc9266", + "text": "3.3 Position-wise Feed-Forward Networks", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "3.3 Position-wise Feed-Forward Networks", - "type": "Title" - }, - { - "element_id": "46bb05e8d9c19147942fb75345ae3dbb", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "46bb05e8d9c19147942fb75345ae3dbb", + "text": "In addition to attention sub-layers, each of the layers in our encoder and decoder contains a fully connected feed-forward network, which is applied to each position separately and identically. This consists of two linear transformations with a ReLU activation in between.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "In addition to attention sub-layers, each of the layers in our encoder and decoder contains a fully connected feed-forward network, which is applied to each position separately and identically. This consists of two linear transformations with a ReLU activation in between.", - "type": "NarrativeText" - }, - { - "element_id": "eda9b46d50730928c8437d6149e01a2b", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Formula", + "element_id": "eda9b46d50730928c8437d6149e01a2b", + "text": "FFN(x) = max(0, xW1 + b1)W2 + b2 (2)", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "FFN(x) = max(0, xW1 + b1)W2 + b2 (2)", - "type": "Formula" - }, - { - "element_id": "43c1741dc91b5b67a03a726873df3be5", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "43c1741dc91b5b67a03a726873df3be5", + "text": "While the linear transformations are the same across different positions, they use different parameters from layer to layer. Another way of describing this is as two convolutions with kernel size 1. The dimensionality of input and output is dmodel = 512, and the inner-layer has dimensionality df f = 2048.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "While the linear transformations are the same across different positions, they use different parameters from layer to layer. Another way of describing this is as two convolutions with kernel size 1. The dimensionality of input and output is dmodel = 512, and the inner-layer has dimensionality df f = 2048.", - "type": "NarrativeText" - }, - { - "element_id": "63fc763509dec0fa03ba8296e4b0616e", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Title", + "element_id": "63fc763509dec0fa03ba8296e4b0616e", + "text": "3.4 Embeddings and Softmax", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "3.4 Embeddings and Softmax", - "type": "Title" - }, - { - "element_id": "ab8cefbb53c308302ee0e3c0c7ecfd25", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "ab8cefbb53c308302ee0e3c0c7ecfd25", + "text": "Similarly to other sequence transduction models, we use learned embeddings to convert the input tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor- mation and softmax function to convert the decoder output to predicted next-token probabilities. In our model, we share the same weight matrix between the two embedding layers and the pre-softmax dmodel. linear transformation, similar to [30]. In the embedding layers, we multiply those weights by", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Similarly to other sequence transduction models, we use learned embeddings to convert the input tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor- mation and softmax function to convert the decoder output to predicted next-token probabilities. In our model, we share the same weight matrix between the two embedding layers and the pre-softmax dmodel. linear transformation, similar to [30]. In the embedding layers, we multiply those weights by", - "type": "NarrativeText" - }, - { - "element_id": "b45e24bb89196d4b50d76df531acfaf2", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-10-17T23:20:41+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", - "version": "322346180051831626890059520864532632042" - }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } + }, + { + "type": "Footer", + "element_id": "b45e24bb89196d4b50d76df531acfaf2", + "text": "5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "5", - "type": "Footer" + "page_number": 1, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf", + "version": "f2819db63e5aa2a3a6b8ba305f1911ea", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1697584841.0", + "date_modified": "1697584841.0" + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.json similarity index 60% rename from test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json rename to test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.json index dd896a237c..c614618094 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.json @@ -1,1850 +1,1938 @@ [ { + "type": "Title", "element_id": "614b7a52d42e8e3b66edf4943093c85c", + "text": "WORLD ASSOCIATION", "metadata": { - "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "WORLD ASSOCIATION", - "type": "Title" - }, - { - "element_id": "4ab4d4df6aeb3d4fb6d8102edd876ab8", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "4ab4d4df6aeb3d4fb6d8102edd876ab8", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "7137c1e14141fad3ad306fe68918a967", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7137c1e14141fad3ad306fe68918a967", + "text": "Recalibrating risk", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Recalibrating risk", - "type": "NarrativeText" - }, - { - "element_id": "dbdc2d6c6381e4fa1c7b8058bf86abef", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "dbdc2d6c6381e4fa1c7b8058bf86abef", + "text": "Putting nuclear risk in context and perspective", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 1 - }, - "text": "Putting nuclear risk in context and perspective", - "type": "NarrativeText" - }, - { - "element_id": "f71e85ddcaf37c2df39af496a16c23ab", - "metadata": { + "page_number": 1, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f71e85ddcaf37c2df39af496a16c23ab", + "text": "\u00a9 2021 World Nuclear Association", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "© 2021 World Nuclear Association", - "type": "NarrativeText" - }, - { - "element_id": "24a8636658e8d7b70af1f0536d494159", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "24a8636658e8d7b70af1f0536d494159", + "text": "Registered in England and Wales, company number 01215741. This report represents the views of individual experts, but does not necessarily represent those of any of the World Nuclear Association\u2019s individual member organizations.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 2 - }, - "text": "Registered in England and Wales, company number 01215741. This report represents the views of individual experts, but does not necessarily represent those of any of the World Nuclear Association’s individual member organizations.", - "type": "NarrativeText" - }, - { - "element_id": "a301376c55ac727652cf954ec5e913d6", - "metadata": { + "page_number": 2, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "a301376c55ac727652cf954ec5e913d6", + "text": "Executive Summary", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Executive Summary", - "type": "Title" - }, - { - "element_id": "5b2ec7692e73027141163ac9031623f5", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "5b2ec7692e73027141163ac9031623f5", + "text": "Nuclear energy is crucial to meeting the world\u2019s ever-increasing demand for energy, thanks to its ability to supply affordable, reliable, and sustainable electricity and heat. Despite the many benefits of nuclear energy, its deployment is hindered in some parts of the world due to long-standing misconceptions about its risks. Even with its safety record \u2013 unmatched by any other energy source \u2013 the perception of nuclear power as uniquely dangerous endures.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Nuclear energy is crucial to meeting the world’s ever-increasing demand for energy, thanks to its ability to supply affordable, reliable, and sustainable electricity and heat. Despite the many benefits of nuclear energy, its deployment is hindered in some parts of the world due to long-standing misconceptions about its risks. Even with its safety record – unmatched by any other energy source – the perception of nuclear power as uniquely dangerous endures.", - "type": "NarrativeText" - }, - { - "element_id": "cb2f79944be60fd4447f31653abc1c33", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "cb2f79944be60fd4447f31653abc1c33", + "text": "This is reflected in the regulatory burden placed on the nuclear industry, which is geared towards an \u201cas low as possible\u201d approach, demanding radiation levels to be far below the levels where health effects have been observed (and in many cases below natural background radiation). This has resulted in higher costs, without delivering any additional health benefits, and has resulted in policymakers choosing other, more risky energy sources. More often than not, those alternative energy sources have been fossil fuels, greatly exacerbating the well-known risks posed by air pollution and climate change.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "This is reflected in the regulatory burden placed on the nuclear industry, which is geared towards an “as low as possible” approach, demanding radiation levels to be far below the levels where health effects have been observed (and in many cases below natural background radiation). This has resulted in higher costs, without delivering any additional health benefits, and has resulted in policymakers choosing other, more risky energy sources. More often than not, those alternative energy sources have been fossil fuels, greatly exacerbating the well-known risks posed by air pollution and climate change.", - "type": "NarrativeText" - }, - { - "element_id": "edd5a77bbaec194649c3e909359778dc", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "edd5a77bbaec194649c3e909359778dc", + "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Expanding the use of nuclear energy is essential for solving some of the biggest challenges facing humanity. Nuclear power has already played a major role in avoiding the emission of air pollutants and greenhouse gases, a role that will have to be greatly expanded in the future to ensure global energy supplies are decarbonized by 2050. Nuclear energy will also play a major part in ensuring that the transition to a low-carbon future is done in an equitable fashion, providing people across the world with a high-powered and sustainable future.", - "type": "NarrativeText" - }, - { - "element_id": "0176a09ce730c661b03e05b873f406e0", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "0176a09ce730c661b03e05b873f406e0", + "text": "In order to fully unlock the potential of the atom, it is crucial that the gap between perceived and actual risks is addressed. The window of opportunity to act on climate change and other global challenges is closing fast \u2013 we must not delay increasing the contribution of nuclear energy on the grounds of myths and misconceptions.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "In order to fully unlock the potential of the atom, it is crucial that the gap between perceived and actual risks is addressed. The window of opportunity to act on climate change and other global challenges is closing fast – we must not delay increasing the contribution of nuclear energy on the grounds of myths and misconceptions.", - "type": "NarrativeText" - }, - { - "element_id": "f6475defefd93d0b101032386c3904b9", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f6475defefd93d0b101032386c3904b9", + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", - "type": "NarrativeText" - }, - { - "element_id": "0e8aeb80f607db6cdf4f1fbcfc095048", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "0e8aeb80f607db6cdf4f1fbcfc095048", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "14926217ad635f3b0d8aa6a1c084ffae", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Footer", + "element_id": "14926217ad635f3b0d8aa6a1c084ffae", + "text": "1", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 3 - }, - "text": "1", - "type": "Footer" - }, - { - "element_id": "d7d7ca3be3dd09816ae5d0294281e8d9", - "metadata": { + "page_number": 3, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "d7d7ca3be3dd09816ae5d0294281e8d9", + "text": "2", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "2", - "type": "Header" - }, - { - "element_id": "ca878cdd15c9e34463e380dfcb994c2d", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "ca878cdd15c9e34463e380dfcb994c2d", + "text": "Perceived versus actual risk", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Perceived versus actual risk", - "type": "Title" - }, - { - "element_id": "7a6ffe00fc921772e62d72d8b36ae72d", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7a6ffe00fc921772e62d72d8b36ae72d", + "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity.", - "type": "NarrativeText" - }, - { - "element_id": "31ed39cf3f959ddf86d3eba65cb79a01", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "31ed39cf3f959ddf86d3eba65cb79a01", + "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture\u2019s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific.", - "type": "NarrativeText" - }, - { - "element_id": "a66214340855880a5393384d1363511c", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "a66214340855880a5393384d1363511c", + "text": "Rank Order Laypersons", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Rank Order Laypersons", - "type": "Title" - }, - { - "element_id": "9512f477364e1da1fa60dbd237c41f85", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Table", + "element_id": "9512f477364e1da1fa60dbd237c41f85", + "text": "Experts 1 20 Nuclear power Motor vehicles 2 1 4 3 Handguns 2 4 Smoking Electric power (non-nuclear) 9 17 22 7 X-rays 25 30 Vaccinations", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "Experts 1 20 Nuclear power Motor vehicles 2 1 4 3 Handguns 2 4 Smoking Electric power (non-nuclear) 9 17 22 7 X-rays 25 30 Vaccinations", - "type": "Table" - }, - { - "element_id": "3ff36869cefb14183f0955094a908fc5", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "3ff36869cefb14183f0955094a908fc5", + "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "In reality, radiation is a natural part of life; indeed, we are all exposed to radiation every day, on average receiving 2-3 millisieverts (mSv) per year. Most of this radiation is naturally occurring, with radon gas from the ground being the main source of exposure. The nuclear industry is responsible for a very small part of radiation exposure to the public, as seen in Figure 2. To put this into perspective, eating 10 bananas or two Brazil nuts results in the same radiation dose as living nearby a nuclear power plant for a year. Humans are also naturally radioactive, and the radiation dose from sleeping next to someone else each night for a year is ten times higher than the exposure from living nearby a nuclear power plant for the same time span.", - "type": "NarrativeText" - }, - { - "element_id": "773667f6d4fbe19cc347ace06ca3664e", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "773667f6d4fbe19cc347ace06ca3664e", + "text": "In fact, scientific consensus is that when it comes to preventing exposure to radiation, nuclear power is much better than other electricity generators. A 2016 reportiii from the United Nations Scientific Committee on the Effects of Atomic Radiation (UNSCEAR) found that coal-generated electricity is responsible for more than half of the total global radiation exposure arising from electricity generation, while nuclear power contributed less than a fifth. Coal miners received high occupational exposure and workers in solar and wind farms received the highest occupational exposure associated with plant construction for the same amount of installed capacity.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 4 - }, - "text": "In fact, scientific consensus is that when it comes to preventing exposure to radiation, nuclear power is much better than other electricity generators. A 2016 reportiii from the United Nations Scientific Committee on the Effects of Atomic Radiation (UNSCEAR) found that coal-generated electricity is responsible for more than half of the total global radiation exposure arising from electricity generation, while nuclear power contributed less than a fifth. Coal miners received high occupational exposure and workers in solar and wind farms received the highest occupational exposure associated with plant construction for the same amount of installed capacity.", - "type": "NarrativeText" - }, - { - "element_id": "6dab7cb99fa308838e8c0413caccb7f1", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, - "filetype": "application/pdf", - "languages": [ - "eng" - ], - "page_number": 4 - }, - "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since.", - "type": "NarrativeText" + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } }, { - "element_id": "57a9b2172894596e88b48caac276416d", + "type": "NarrativeText", + "element_id": "6dab7cb99fa308838e8c0413caccb7f1", + "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since.", "metadata": { - "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" - }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Natural Artificial 48% Radon 14% Buildings & soil 12% Food & water 10% Cosmic 4% Thoron 11% Medicine 0.4% 0.4% Miscellaneous 0.2% Occupational 0.04% Nuclear discharges Fallout ", - "type": "Image" - }, - { - "element_id": "9add7e245b0191530386fcf00bb218df", - "metadata": { + "page_number": 4, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "57a9b2172894596e88b48caac276416d", + "text": "Natural Artificial 48% Radon 14% Buildings & soil 12% Food & water 10% Cosmic 4% Thoron 11% Medicine 0.4% 0.4% Miscellaneous 0.2% Occupational 0.04% Nuclear discharges Fallout ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Figure 2. Global average exposure from different sources of radiation", - "type": "FigureCaption" - }, - { - "element_id": "e9a9edcbe70d12be56e6947010012f76", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "9add7e245b0191530386fcf00bb218df", + "text": "Figure 2. Global average exposure from different sources of radiation", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does.", - "type": "NarrativeText" - }, - { - "element_id": "b8ec2beffbf0ae3c620edbc5288b5e7f", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "e9a9edcbe70d12be56e6947010012f76", + "text": "Fossil fuels \u2013 currently accounting for around 81% of total energy supplyiv \u2013 cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution\u2019s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi.", - "type": "NarrativeText" - }, - { - "element_id": "1b93c33208a85ba6d2a69d23babd6def", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "b8ec2beffbf0ae3c620edbc5288b5e7f", + "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "25 24.6 20 18.4 e 15 10 5 4.6 2.8 0 C oal Oil Bio m ass N atural gas 0.07 Wind 0.04 H ydropo w er 0.02 S olar 0.01 N uclear ", - "type": "Image" - }, - { - "element_id": "6c9fe7851d0f4e06c5ec939f53dbce3b", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "1b93c33208a85ba6d2a69d23babd6def", + "text": "25 24.6 20 18.4 e 15 10 5 4.6 2.8 0 C oal Oil Bio m ass N atural gas 0.07 Wind 0.04 H ydropo w er 0.02 S olar 0.01 N uclear ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "r a e y", - "type": "NarrativeText" - }, - { - "element_id": "a70b649d3f49fafd8a15a6617364bd69", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "6c9fe7851d0f4e06c5ec939f53dbce3b", + "text": "r a e y", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "W T", - "type": "Title" - }, - { - "element_id": "26963be98ae7ff2e8c0428862d074cf6", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "a70b649d3f49fafd8a15a6617364bd69", + "text": "W T", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "r e p s e i t i l", - "type": "NarrativeText" - }, - { - "element_id": "b69c60ea2e3fa24e25a069f90ee4b696", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "26963be98ae7ff2e8c0428862d074cf6", + "text": "r e p s e i t i l", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "a t a F", - "type": "Title" - }, - { - "element_id": "af241d12aef0f51bace400db4e14649d", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "b69c60ea2e3fa24e25a069f90ee4b696", + "text": "a t a F", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3", - "type": "FigureCaption" - }, - { - "element_id": "1e41836d6d7be638be9c0de0ce2c2256", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "af241d12aef0f51bace400db4e14649d", + "text": "Figure 3. Comparison of number of fatalities due to electricity generation, including accidents and air pollution3", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy – in particular fossil fuels – poses a far greater risk to public health by significantly contributing to climate change and air pollution.", - "type": "NarrativeText" - }, - { - "element_id": "59b04b23d82c7c013abd6477a14c9425", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "1e41836d6d7be638be9c0de0ce2c2256", + "text": "Contrary to perceptions, nuclear is an incredibly safe source of energy (see Figure 3 for a comparison). What is also clear is that the continued use of alternative energy sources in preference to nuclear energy \u2013 in particular fossil fuels \u2013 poses a far greater risk to public health by significantly contributing to climate change and air pollution.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the 2012 UNSCEAR report and the 2015 US NRC SOARCA study.", - "type": "ListItem" - }, - { - "element_id": "9a8b3b64f6d252a6d31d87d306952ca2", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "59b04b23d82c7c013abd6477a14c9425", + "text": "2 Including 28 firefighters that were exposed to lethal amounts of radiation during the accident night, and 15 fatal cases of thyroid cancer. 3 Sources drawn upon: Markandya, A., & Wilkinson, P. (2007), Sovacool et al. (2016). Data for nuclear accidents modified to reflect the 2012 UNSCEAR report and the 2015 US NRC SOARCA study.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 5 - }, - "text": "3", - "type": "Header" - }, - { - "element_id": "e754a2849dac122e7d2e05447f0da512", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "9a8b3b64f6d252a6d31d87d306952ca2", + "text": "3", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "4", - "type": "Header" - }, - { - "element_id": "21b4c32e6d360d1d70e59dad888e306d", - "metadata": { + "page_number": 5, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "e754a2849dac122e7d2e05447f0da512", + "text": "4", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "The low-dose question", - "type": "Title" - }, - { - "element_id": "26e60e901d12cbb5efb851fe945a3f96", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "21b4c32e6d360d1d70e59dad888e306d", + "text": "The low-dose question", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix).", - "type": "NarrativeText" - }, - { - "element_id": "31d07d8c2dce96dc1c6daa38f8597ab5", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "26e60e901d12cbb5efb851fe945a3f96", + "text": "Since the 1950s, the Linear No-Threshold (LNT) theory has been used to inform regulatory decisions, positing that any dose of radiation, regardless of the amount or the duration over which it is received, poses a risk. Assuming that LNT is correct, we should expect to see that people living in areas of the world where background doses are higher (e.g. India, Iran and northern Europe) have a higher incidence of cancer. However, despite people living in areas of the world where radiation doses are naturally higher than those that would be received in parts of the evacuation zones around Chernobyl and Fukushima Daiichi, there is no evidence that these populations exhibit any negative health effects. Living nearby a nuclear power plant on average exposes the local population to 0.00009mSv/year, which according to LNT would increase the risk of developing cancer by 0.00000045%. After Chernobyl, the average dose to those evacuated was 30mSv, which would theoretically increase the risk of cancer at some point in their lifetime by 0.15% (on top of the average baseline lifetime risk of cancer, which is 39.5% in the USviii, 50% in the UKix).", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations – using the assumptions made by LNT – led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists.", - "type": "NarrativeText" - }, - { - "element_id": "4fb06aef292d07a36339c830eb23c8b5", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "31d07d8c2dce96dc1c6daa38f8597ab5", + "text": "Since the 1980s, there has been considerable scientific debate as to whether the LNT theory is valid, following scientific breakthroughs within, for example, radiobiology and medicine. Indeed, the Chernobyl accident helped illuminate some of the issues associated with LNT. Multiplication of the low doses after the accident (many far too low to be of any health concern) with large populations \u2013 using the assumptions made by LNT \u2013 led to a large number of predicted cancer deaths, which have not, and likely will not materialize. This practice has been heavily criticized for being inappropriate in making risk assessments by UNSCEAR, the International Commission on Radiation Protection and a large number of independent scientists.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution.", - "type": "NarrativeText" - }, - { - "element_id": "1d9fdadf74d73e63be2e683b0a73d86d", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4fb06aef292d07a36339c830eb23c8b5", + "text": "Determining the precise risk (or lack thereof) of the extremely small radiation doses associated with the routine operations of nuclear power plants, the disposal of nuclear waste or even extremely rare nuclear accidents is a purely academic exercise, that tries to determine whether the risk is extremely low, too small to detect, or non- existent. The risks of low-level radiation pale in comparison to other societal risks such as obesity, smoking, and air pollution.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken.", - "type": "NarrativeText" - }, - { - "element_id": "31e685f786964a09b167a0fb68c01973", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "1d9fdadf74d73e63be2e683b0a73d86d", + "text": "By looking at radiation risks in isolation, we prolong the over-regulation of radiation in nuclear plants, driving up costs, whilst not delivering any additional health benefits, in turn incentivising the use of more harmful energy sources. A recalibration is required, and this can only done by ensuring a holistic approach to risk is taken.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 6 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "d3b1c14b00a459af4d32459678203328", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "31e685f786964a09b167a0fb68c01973", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Adopting an all-hazards approach", - "type": "Title" - }, - { - "element_id": "4ab8051907907017f78ae83e1b95dc07", - "metadata": { + "page_number": 6, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "d3b1c14b00a459af4d32459678203328", + "text": "Adopting an all-hazards approach", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A ‘strong’ interpretation of the precautionary principle, or a ‘as low as possible’ approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation.", - "type": "NarrativeText" - }, - { - "element_id": "bac4e8745150c829821f40387f9107c6", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4ab8051907907017f78ae83e1b95dc07", + "text": "Contemporary debates around nuclear energy often reflect the precautionary principle, a problematic concept applied across a range of regulatory and policy issues. A \u2018strong\u2019 interpretation of the precautionary principle, or a \u2018as low as possible\u2019 approach to risk, dictates that regulation is required whenever there is a potential adverse health risk, even if the evidence is not certain and regardless of the cost of regulation.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone.", - "type": "NarrativeText" - }, - { - "element_id": "4ea8f70af2d39cae9f63fce0ea7165f1", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "bac4e8745150c829821f40387f9107c6", + "text": "The overall regulatory philosophy, at least theoretically, used in the nuclear industry is the ALARA (As Low As Reasonably Achievable) principle, where any regulatory action on radiation should account for socio- economic benefits and costs, as opposed to making decisions based on radiation risks alone.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an ‘as low as possible’ mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels.", - "type": "NarrativeText" - }, - { - "element_id": "17f00667e4e913d53d1deec4900e13f2", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4ea8f70af2d39cae9f63fce0ea7165f1", + "text": "However, the regulatory process and the policy debate around nuclear more broadly has long departed from the ALARA principle, no longer weighing cost versus benefits, or considering the overall advantages of nuclear energy, but rather looking at radiation in isolation. This has resulted in a subtle shift towards an \u2018as low as possible\u2019 mentality. Attempting to reduce radiation far below de facto safe levels has resulted in an escalation of costs and loss of public confidence, and in some cases has deprived communities of the many benefits nuclear energy provides. In practical terms, this has led to the continued use of more harmful energy sources, such as fossil fuels.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an “all-hazards” approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs – be they economic, environmental, or public health – associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power.", - "type": "NarrativeText" - }, - { - "element_id": "dc83c2d2395c30a8785a2533424f1c72", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "17f00667e4e913d53d1deec4900e13f2", + "text": "If the potential of nuclear energy is to be fully realized, public health and safety approaches must be recalibrated to consider a wider range of factors when considering radiation, adopting an \u201call-hazards\u201d approach. Such an approach must ensure that risks are placed within a proper perspective and context, rather than looking at them in isolation. We therefore must not look at the costs \u2013 be they economic, environmental, or public health \u2013 associated with an individual power plant in isolation, but rather the costs associated with it (and its alternatives) at a societal level (Figure 4). This would entail looking at the potential risks arising from the use of nuclear power and comparing these with the risks associated with not adopting nuclear power.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Plant-level production costs at market prices Grid-level costs of the electricity system Social and environmental costs of emissions, land-use, climate change, security of supply, etc. ", - "type": "Image" - }, - { - "element_id": "a4c9c9c3c416898e4ba9df7c3a3e31ac", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "dc83c2d2395c30a8785a2533424f1c72", + "text": "Plant-level production costs at market prices Grid-level costs of the electricity system Social and environmental costs of emissions, land-use, climate change, security of supply, etc. ", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Figure 4. The different levels of cost associated with electricity generationx", - "type": "FigureCaption" - }, - { - "element_id": "4d355be258d489a707dfd634ea0f8810", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "a4c9c9c3c416898e4ba9df7c3a3e31ac", + "text": "Figure 4. The different levels of cost associated with electricity generationx", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project.", - "type": "NarrativeText" - }, - { - "element_id": "9c79edb1b3d13f851f56559d7d3046a8", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "4d355be258d489a707dfd634ea0f8810", + "text": "A more holistic regulatory process would be required, in which regulators move away from being siloed, looking at specific risks in isolation, with little regard for the greater picture. The move towards an all-hazard, holistic approach would require greater coordination between regulators, ensuring that the combined risks of a specific nuclear project are weighed against the risks posed by not advancing said project.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources.", - "type": "NarrativeText" - }, - { - "element_id": "4fa08c2ba2738ea62be41b466b794caa", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "9c79edb1b3d13f851f56559d7d3046a8", + "text": "Equally, the adoption of an all-hazards approach means regulators should consider declaring when a risk is too low to be a public health concern, in line with what the U.S. Nuclear Regulatory Commission attempted to do with its Below Regulatory Concern policy statements in the 1980s and early 1990s. In the context of nuclear power, this means departing from the notion that LNT instils of no safe level of radiation, and adopting a regulatory framework which notes the impossibility of eradicating risks. Failing to do so will result in excessive regulation that continues to limit the full potential of nuclear power in tackling climate change and sees a continued reliance on objectively more harmful energy sources.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 7 - }, - "text": "5", - "type": "Header" - }, - { - "element_id": "eeae015ec524f1b1bb0f7ac376a2090b", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "4fa08c2ba2738ea62be41b466b794caa", + "text": "5", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "6", - "type": "Header" - }, - { - "element_id": "b9eaa26361e53e6e430494de5febf1b1", - "metadata": { + "page_number": 7, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "eeae015ec524f1b1bb0f7ac376a2090b", + "text": "6", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Recalibrating the risk conversation", - "type": "Title" - }, - { - "element_id": "d4b3fedebdff5b1fbae81fce93b964b2", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "b9eaa26361e53e6e430494de5febf1b1", + "text": "Recalibrating the risk conversation", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "By looking at radiation risks in isolation, we have created something akin to a “radiation phobia”, that both directly and indirectly harms people around the world. For instance, it is well established that the vast majority of health impacts from Chernobyl and Fukushima Daiichi were not radiological, but rather psychosocial. There has been an observable and dramatic increase in depression, PTSD, substance abuse, and suicides following these events, which can be significantly attributed to the dissonance between the actual and perceived risks of radiation, and the stigmatization they caused.", - "type": "NarrativeText" - }, - { - "element_id": "f71d9a617fd81ca5b4e4fd8070b56548", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "d4b3fedebdff5b1fbae81fce93b964b2", + "text": "By looking at radiation risks in isolation, we have created something akin to a \u201cradiation phobia\u201d, that both directly and indirectly harms people around the world. For instance, it is well established that the vast majority of health impacts from Chernobyl and Fukushima Daiichi were not radiological, but rather psychosocial. There has been an observable and dramatic increase in depression, PTSD, substance abuse, and suicides following these events, which can be significantly attributed to the dissonance between the actual and perceived risks of radiation, and the stigmatization they caused.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this “radiation phobia”. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world’s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change.", - "type": "NarrativeText" - }, - { - "element_id": "482fb91e0e9646a1ad97fe9881ff6053", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f71d9a617fd81ca5b4e4fd8070b56548", + "text": "Similarly, many of the tremendous challenges the global community faces are significantly driven by this \u201cradiation phobia\u201d. Indeed, several of these issues have been considerably exacerbated by the fact that certain risks are given a disproportionate amount of focus, whereas others are de facto ignored. The global conversation around climate change is a prime example of this. The historical use of fossil fuels has contributed significantly to climate change through greenhouse gas emissions, causing unprecedented changes in the liveability of the Earth. By 2025, half of the world\u2019s population will be living in water-stressed areas, as extreme heat and droughts are exacerbating water resources. Between 2030 and 2050, climate change is expected to be the cause of an additional 250,000 deaths per year, arising from malnutrition, malaria, diarrhoea and heat stressx. Yet, despite the huge risks associated with climate change, our addiction to coal, oil, and fossil gas remains, with fossil fuels providing 84% of global primary energy in 2019xii. The continued prioritization of fossil fuels at the expense of nuclear energy results in a considerable increase in the risks posed by climate change.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "b0197950e1af5c2aac10f5b67d61524a", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "482fb91e0e9646a1ad97fe9881ff6053", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "34d2dd4af420ea3fdddc8fc5d581cac2", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "b0197950e1af5c2aac10f5b67d61524a", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "a8ac039aa1d77ac96ecd4c8c14a556d5", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "34d2dd4af420ea3fdddc8fc5d581cac2", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 8 - }, - "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world.", - "type": "FigureCaption" - }, - { - "element_id": "c15d25fbb31fbdcee08859ae84555ead", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "FigureCaption", + "element_id": "a8ac039aa1d77ac96ecd4c8c14a556d5", + "text": "Equally, it is well established that living without access to electricity results in illness and death around the world, caused by everything from not having access to modern healthcare to household air pollution. As of today, 770 million people around the world do not have access to electricity, with over 75% of that population living in Sub-Saharan Africa. The world's poorest 4 billion people consume a mere 5% of the energy used in developed economies, and we need to find ways of delivering reliable electricity to the entire human population in a fashion that is sustainable. Household and ambient air pollution causes 8.7 million deaths each year, largely because of the continued use of fossil fuels. Widespread electrification is a key tool for delivering a just energy transition. Investment in nuclear, has become an urgent necessity. Discarding it, based on risk perceptions divorced from science, would be to abandon the moral obligation to ensure affordable, reliable, and sustainable energy for every community around the world.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "868eb20ec9d28b0559f28d7b22f4b3af", - "metadata": { + "page_number": 8, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "c15d25fbb31fbdcee08859ae84555ead", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks.", - "type": "NarrativeText" - }, - { - "element_id": "2b6ea4cb3cdceb2bb69833c7fecedca7", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "868eb20ec9d28b0559f28d7b22f4b3af", + "text": "Clearly, we have reached a point where we must establish a new conversation about the relative risks of using nuclear, especially when risks created by other energy sources are considered. We cannot address many of the global challenges we face without a significant increase in the use of nuclear energy. The detrimental effects of decades of looking at nuclear risks in isolation highlights just how crucial it is that regulators and policymakers change the way they view nuclear energy, and transition towards an all-hazards approach, ensuring that actions taken to mitigate risks do not result in creating more severe risks.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity.", - "type": "NarrativeText" - }, - { - "element_id": "a9903d695f11dc8e87d1a35cfd2673c0", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "2b6ea4cb3cdceb2bb69833c7fecedca7", + "text": "We must begin to holistically look at the severity of the consequences of maintaining the current energy production system, many of which are irreversible. The ways in which we address climate change and other issues of global importance must be sustainable and not create new hazards down the line. The reality is that nuclear has always been and remains an exceptionally safe source of energy, representing the lowest risk, the most sustainable, and the most affordable ways to generate around-the-clock electricity.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", - "type": "NarrativeText" - }, - { - "element_id": "c48fc44c56f9a43d02304448e8b717ad", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "a9903d695f11dc8e87d1a35cfd2673c0", + "text": "Therefore, World Nuclear Association calls upon policymakers and regulators to adopt an all-hazards approach, where different risks associated with energy producing technologies are placed in perspective and the appropriate context, and examined in line with the latest scientific evidence. Policymakers and regulators must ensure that their decisions regarding radiation protection do not create greater risks elsewhere. This include the recalibration of existing regulations regarding nuclear power and radiation, weighing the cost of regulatory measures against the societal benefits provided by nuclear energy.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "", - "type": "Image" - }, - { - "element_id": "81e1362c9dfb3e04603ba076f6384c1b", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Image", + "element_id": "c48fc44c56f9a43d02304448e8b717ad", + "text": "", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 9 - }, - "text": "7", - "type": "Footer" - }, - { - "element_id": "254ec884050f824b4524f53a2693f685", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Footer", + "element_id": "81e1362c9dfb3e04603ba076f6384c1b", + "text": "7", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "8", - "type": "Header" - }, - { - "element_id": "3d819f053bf67ec228cf8c23aca02ac7", - "metadata": { + "page_number": 9, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Header", + "element_id": "254ec884050f824b4524f53a2693f685", + "text": "8", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "References", - "type": "Title" - }, - { - "element_id": "59f05d231c2357ab111ee31b0da3c25d", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "3d819f053bf67ec228cf8c23aca02ac7", + "text": "References", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries", - "type": "ListItem" - }, - { - "element_id": "a95a2add68d668b944cc332c88ea721e", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "59f05d231c2357ab111ee31b0da3c25d", + "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "i", - "type": "Title" - }, - { - "element_id": "a3be0ee530629ee7a2413f05eb0cce76", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "Title", + "element_id": "a95a2add68d668b944cc332c88ea721e", + "text": "i", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712", - "type": "ListItem" - }, - { - "element_id": "1eed7cbdb8d1de7a460e08217dd4c5c4", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "a3be0ee530629ee7a2413f05eb0cce76", + "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747.", - "type": "ListItem" - }, - { - "element_id": "2c1956254dae0592c70f445f8c8973c3", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "1eed7cbdb8d1de7a460e08217dd4c5c4", + "text": "Slovic, P., 2010. The Psychology of risk. Sa\u00fade e Sociedade, 19(4), pp. 731-747.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf", - "type": "ListItem" - }, - { - "element_id": "4ca71e69090af4ad16216a0ddcc0a168", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "2c1956254dae0592c70f445f8c8973c3", + "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018", - "type": "ListItem" - }, - { - "element_id": "16bb7fca4ab44ffc73e847ea7b93fc4d", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "4ca71e69090af4ad16216a0ddcc0a168", + "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8", - "type": "ListItem" - }, - { - "element_id": "0754024aff3f7b1e2850b689e180fc7f", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "16bb7fca4ab44ffc73e847ea7b93fc4d", + "text": "Vohra, K., Vodonos, A., Schwartz, J., Marais, E., Sulprizio, M., & Mickley, L. (2021). Global mortality from outdoor fine particle pollution generated by fossil fuel combustion: Results from GEOS-Chem. Environmental Research, 195, p. 1-8", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "vii World Health Organization. (2016). Updated tables 2016 for ‘Preventing disease through health environments: a global assessment of the burden of disease from environmental risks’. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]", - "type": "ListItem" - }, - { - "element_id": "84953aabe78d95937c2bde298a8655c7", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "0754024aff3f7b1e2850b689e180fc7f", + "text": "vii World Health Organization. (2016). Updated tables 2016 for \u2018Preventing disease through health environments: a global assessment of the burden of disease from environmental risks\u2019. Available at: https://www.who.int/data/gho/ data/themes/public-health-and-environment [Accessed on 8 April 2021]", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/ understanding/statistics", - "type": "ListItem" - }, - { - "element_id": "1f4fb7cb0ed3eb56bc1b0c4a36c5885a", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "84953aabe78d95937c2bde298a8655c7", + "text": "viii National Cancer Institute (2020). Cancer statistics. Available at: https://www.cancer.gov/about-cancer/ understanding/statistics", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health- professional/cancer-statistics/risk", - "type": "ListItem" - }, - { - "element_id": "eae4cdbbf622c9a2885b4585f9c8f115", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "1f4fb7cb0ed3eb56bc1b0c4a36c5885a", + "text": "ix Cancer Research UK (n.d.). Cancer risk statistics. Available at: https://www.cancerresearchuk.org/health- professional/cancer-statistics/risk", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/ the-full-costs-of-electricity-provision?details=true", - "type": "ListItem" - }, - { - "element_id": "04bb02d84ab3e401a668eb44fd8d676c", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "eae4cdbbf622c9a2885b4585f9c8f115", + "text": "x OECD-NEA (2019). The Full Costs of Electricity Provision. Available at: https://www.oecd-nea.org/jcms/pl_14998/ the-full-costs-of-electricity-provision?details=true", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact- sheets/detail/climate-change-and-health", - "type": "ListItem" - }, - { - "element_id": "2e6ca48a0339d0bc72e7cc00dd922b7f", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "04bb02d84ab3e401a668eb44fd8d676c", + "text": "xi World Health Organization (2018). Climate change and health. Available at: https://www.who.int/news-room/fact- sheets/detail/climate-change-and-health", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP.", - "type": "ListItem" - }, - { - "element_id": "e7dbdcca2a70adc904aab19790ef9d50", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "ListItem", + "element_id": "2e6ca48a0339d0bc72e7cc00dd922b7f", + "text": "xii BP, 2020. BP Statistical Review of World Energy, London: BP.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 10 - }, - "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper.", - "type": "NarrativeText" - }, - { - "element_id": "48dae8ac77cc653df8fe46ca706c1931", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "e7dbdcca2a70adc904aab19790ef9d50", + "text": "Photo credits: Front cover & pages 1, 4, 6 left, 7 bottom: Adobe Stock; page 6 right: Getty Images; page 7 top: Uniper.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", - "type": "NarrativeText" - }, - { - "element_id": "7ac9948c40759d5a7c1a2dbe870ca322", - "metadata": { + "page_number": 10, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "48dae8ac77cc653df8fe46ca706c1931", + "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", - "type": "NarrativeText" - }, - { - "element_id": "f133df66727c328c27d3b8184204f864", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "7ac9948c40759d5a7c1a2dbe870ca322", + "text": "+44 (0)20 7451 1520 www.world-nuclear.org info@world-nuclear.org", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", - "type": "NarrativeText" - }, - { - "element_id": "5b875f4e450a92d95cbcf868d76ab04e", - "metadata": { + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "f133df66727c328c27d3b8184204f864", + "text": "World Nuclear Association is the international organization that represents the global nuclear industry. Its mission is to promote a wider understanding of nuclear energy among key international influencers by producing authoritative information, developing common industry positions, and contributing to the energy debate.", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "contributing to the energy debate.", - "type": "Image" + "page_number": 12, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } }, { - "element_id": "dfaec74062af8b1e913079272ee13f49", + "type": "Image", + "element_id": "5b875f4e450a92d95cbcf868d76ab04e", + "text": "contributing to the energy debate.", "metadata": { + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 12, "data_source": { - "date_modified": "2023-02-12T10:09:32+00:00", + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", "record_locator": { "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" }, - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", - "version": "306475068461766865312866697521104206816" - }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } + }, + { + "type": "NarrativeText", + "element_id": "dfaec74062af8b1e913079272ee13f49", + "text": "Recalibrating risk \u00a9 2021 World Nuclear Association. Registered in England and Wales, company number 01215741", + "metadata": { "filetype": "application/pdf", "languages": [ "eng" ], - "page_number": 12 - }, - "text": "Recalibrating risk © 2021 World Nuclear Association. Registered in England and Wales, company number 01215741", - "type": "NarrativeText" + "page_number": 12, + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "e690f37ef36368a509d150f373a0bbe0", + "record_locator": { + "protocol": "s3", + "remote_file_path": "s3://utic-dev-tech-fixtures/small-pdf-set/" + }, + "date_created": "1676196572.0", + "date_modified": "1676196572.0" + } + } } ] \ No newline at end of file diff --git a/test_unstructured_ingest/src/against-api.sh b/test_unstructured_ingest/src/against-api.sh index b682c6d6a1..63ab033dfa 100755 --- a/test_unstructured_ingest/src/against-api.sh +++ b/test_unstructured_ingest/src/against-api.sh @@ -36,7 +36,6 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --strategy hi_res \ --chunking-strategy by_page \ --chunk-max-characters 10000 \ - --pdf-infer-table-structure \ --reprocess \ --output-dir "$OUTPUT_DIR" \ --verbose \ diff --git a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh index 7618a25be4..4265d0c4f5 100755 --- a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh +++ b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh @@ -27,7 +27,6 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --skip-infer-table-types "xls,xlsx" \ - --pdf-infer-table-structure \ --strategy hi_res \ --verbose \ --reprocess \ diff --git a/test_unstructured_ingest/src/s3-minio.sh b/test_unstructured_ingest/src/s3-minio.sh index ba264f2c77..c6011be057 100755 --- a/test_unstructured_ingest/src/s3-minio.sh +++ b/test_unstructured_ingest/src/s3-minio.sh @@ -38,7 +38,7 @@ AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key \ s3 \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.date_created \ --strategy hi_res \ --preserve-downloads \ --reprocess \ diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 7244fcbcaf..82e0bd644f 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.14.1-dev0" # pragma: no cover +__version__ = "0.14.1-dev1" # pragma: no cover diff --git a/unstructured/ingest/README.md b/unstructured/ingest/README.md index dc6fb96cd3..3072a1d5c4 100644 --- a/unstructured/ingest/README.md +++ b/unstructured/ingest/README.md @@ -1,4 +1,5 @@ -# Batch Processing Documents +# Batch Processing Documents [DEPRECATED] +For the latest approach, go to: [v2](./v2) ## The unstructured-ingest CLI diff --git a/unstructured/ingest/__init__.py b/unstructured/ingest/__init__.py index e69de29bb2..9d48db4f9f 100644 --- a/unstructured/ingest/__init__.py +++ b/unstructured/ingest/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/unstructured/ingest/cli/cli.py b/unstructured/ingest/cli/cli.py index c325f13010..fa7c3008eb 100644 --- a/unstructured/ingest/cli/cli.py +++ b/unstructured/ingest/cli/cli.py @@ -1,6 +1,8 @@ import click from unstructured.ingest.cli import dest, src +from unstructured.ingest.v2.cli.cmds import dest as dest_v2 +from unstructured.ingest.v2.cli.cmds import src as src_v2 @click.group() @@ -15,10 +17,16 @@ def get_cmd() -> click.Command: to the main command as nested subcommands. """ cmd = ingest + src_dict = {s.name: s for s in src} + dest_dict = {d.name: d for d in dest} + for s in src_v2: + src_dict[s.name] = s + for d in dest_v2: + dest_dict[d.name] = d # Add all subcommands - for src_subcommand in src: + for src_subcommand in src_dict.values(): # Add all destination subcommands - for dest_subcommand in dest: + for dest_subcommand in dest_dict.values(): src_subcommand.add_command(dest_subcommand) cmd.add_command(src_subcommand) return cmd diff --git a/unstructured/ingest/v2/README.md b/unstructured/ingest/v2/README.md new file mode 100644 index 0000000000..458c76e2f7 --- /dev/null +++ b/unstructured/ingest/v2/README.md @@ -0,0 +1,122 @@ +# Batch Processing Documents + +## The unstructured-ingest CLI + +The unstructured library includes a CLI to batch ingest documents from various sources, storing structured outputs locally on the filesystem. + +For example, the following command processes all the documents in S3 in the +`utic-dev-tech-fixtures` bucket with a prefix of `small-pdf-set/`. + + unstructured-ingest \ + s3 \ + --remote-url s3://utic-dev-tech-fixtures/small-pdf-set/ \ + --anonymous \ + --output-dir s3-small-batch-output \ + --num-processes 2 + +Naturally, --num-processes may be adjusted for better instance utilization with multiprocessing. There is also an optional + +Installation note: make sure to install the following extras when installing unstructured, needed for the above command: + + pip install "unstructured[s3,local-inference]" + +See the [Quick Start](https://github.com/Unstructured-IO/unstructured#eight_pointed_black_star-quick-start) which documents how to pip install `dectectron2` and other OS dependencies, necessary for the parsing of .PDF files. + +# Developers' Guide + +## Local testing + +When testing from a local checkout rather than a pip-installed version of `unstructured`, +just execute `unstructured/ingest/main.py`, e.g.: + + PYTHONPATH=. ./unstructured/ingest/v2/main.py \ + s3 \ + --remote-url s3://utic-dev-tech-fixtures/small-pdf-set/ \ + --anonymous \ + --output-dir s3-small-batch-output \ + --num-processes 2 + +## Adding Source Data Connectors + +To add a source connector, refer to [local.py](unstructured/ingest/v2/processes/connectors/local.py) as an example that implements the two relevant abstract base classes with their associated configs. + +If the connector has an available `fsspec` implementation, then refer to [s3.py](unstructured/ingest/v2/processes/connectors/fsspec/s3.py). + +Make sure to update the source registry via `add_source_entry` using a unique key for the source type. This will expose it as an available connector. + + +Create at least one folder [examples/ingest](examples/ingest) with an easily reproducible +script that shows the new connector in action. + +Finally, to ensure the connector remains stable, add a new script test_unstructured_ingest/test-ingest-\.sh similar to [test_unstructured_ingest/test-ingest-s3.sh](test_unstructured_ingest/test-ingest-s3.sh), and append a line invoking the new script in [test_unstructured_ingest/test-ingest.sh](test_unstructured_ingest/test-ingest.sh). + +You'll notice that the unstructured outputs for the new documents are expected +to be checked into CI under test_unstructured_ingest/expected-structured-output/\. So, you'll need to `git add` those json outputs so that `test-ingest.sh` passes in CI. + +## Adding Destination Data Connectors + +To add a source connector, refer to [local.py](unstructured/ingest/v2/processes/connectors/local.py) as an example that implements the the uploader abstract base classes with the associated configs. + +If the connector has an available `fsspec` implementation, then refer to [s3.py](unstructured/ingest/v2/processes/connectors/fsspec/s3.py). + +Make sure to update the destination registry via `add_source_entry` using a unique key for the source type. This will expose it as an available connector. + +Similar tests and examples should be added to demonstrate/validate the use of the destination connector similar to the steps laid out for a source connector. + +### The checklist: + +In checklist form, the above steps are summarized as: + +- [ ] Create a new file under [connectors/](unstructured/ingest/v2/processes/connectors/) implementing the the base classes required depending on if it's a new source or destination connector. + - [ ] If the IngestDoc relies on a connection or session that could be reused, the subclass of `BaseConnectorConfig` implements a session handle to manage connections. The ConnectorConfig subclass should also inherit from `ConfigSessionHandleMixin` and the IngestDoc subclass should also inherit from `IngestDocSessionHandleMixin`. Check [here](https://github.com/Unstructured-IO/unstructured/pull/1058/files#diff-dae96d30f58cffe1b348c036d006b48bdc7e2e47fbd7c8ec1c45d63face1542d) for a detailed example. + - [ ] Indexer should fetch appropriate metadata from the source that can be used to reference the doc in the pipeline and detect if there are any changes from what might already exist locally. + - [ ] Add the relevant decorators from `unstructured.ingest.error` on top of relevant methods to handle errors such as a source connection error, destination connection error, or a partition error. + - [ ] Register the required information via `add_source_entry` or `add_source_entry` to expose the new connectors. +- [ ] Update the CLI to expose the new connectors via CLI params + - [ ] Add a new file under [cmds](unstructured/ingest/v2/cli/cmds) + - [ ] Add the command base classes from the file above in the [__init__.py](unstructured/ingest/v2/cli/cmds/__init__.py). This will expose it in the CLI. +- [ ] Update [unstructured/ingest/cli](unstructured/ingest/cli) with support for the new connector. +- [ ] Create a folder under [examples/ingest](examples/ingest) that includes at least one well documented script. +- [ ] Add a script test_unstructured_ingest/[src|dest\/test-ingest-\.sh. It's json output files should have a total of no more than 100K. +- [ ] Git add the expected outputs under test_unstructured_ingest/expected-structured-output/\ so the above test passes in CI. +- [ ] Add a line to [test_unstructured_ingest/test-ingest.sh](test_unstructured_ingest/test-ingest.sh) invoking the new test script. +- [ ] Make sure the tests for the connector are running and not skipped by reviewing the logs in CI. +- [ ] If additional python dependencies are needed for the new connector: + - [ ] Add them as an extra to [setup.py](unstructured/setup.py). + - [ ] Update the Makefile, adding a target for `install-ingest-` and adding another `pip-compile` line to the `pip-compile` make target. See [this commit](https://github.com/Unstructured-IO/unstructured/commit/ab542ca3c6274f96b431142262d47d727f309e37) for a reference. + - [ ] The added dependencies should be imported at runtime when the new connector is invoked, rather than as top-level imports. + - [ ] Add the decorator `unstructured.utils.requires_dependencies` on top of each class instance or function that uses those connector-specific dependencies e.g. for `GitHubConnector` should look like `@requires_dependencies(dependencies=["github"], extras="github")` + - [ ] Run `make tidy` and `make check` to ensure linting checks pass. +- [ ] Update ingest documentation [here](https://github.com/Unstructured-IO/docs) +- [ ] For team members that are developing in the original repository: + - [ ] If there are secret variables created for the connector tests, make sure to: + - [ ] add the secrets into Github (contact someone with access) + - [ ] include the secret variables in [`ci.yml`](https://github.com/Unstructured-IO/unstructured/blob/main/.github/workflows/ci.yml) and [`ingest-test-fixtures-update-pr.yml`](https://github.com/Unstructured-IO/unstructured/blob/main/.github/workflows/ingest-test-fixtures-update-pr.yml) + - [ ] add a make install line in the workflow configurations to be able to provide the workflow machine with the required dependencies on the connector while testing + - [ ] Whenever necessary, use the [ingest update test fixtures](https://github.com/Unstructured-IO/unstructured/actions/workflows/ingest-test-fixtures-update-pr.yml) workflow to update the test fixtures. + +## Design References + +The ingest flow is similar to an ETL pipeline that gets defined at runtime based on user input: + +![unstructured ingest diagram](assets/pipeline.png) + + + +### Steps +* `Indexer`: This is responsible for reaching out to the source location and pulling in metadata for each document that will need to be downloaded and processed +* `Downloader`: Using the information generated from the indexer, download the content as files on the local file system for processing. This may require manipulation of the data to prep it for partitioning. +* `Uncompressor`: If enabled, this will look for any supported compressed files (tar and zip are currently supported) and expands those. +* `Partitioner`: Generated the structured enriched content from the local files that have been pulled down. Both local and api-based partitioning is supported, with api-based partitioning set to run async while local set to run via multiprocessing. +* `Chunker`: Optionally chunk the partitioned content. Can also be run locally or via the api, with async/multiprocessing set in the same approach as the partitioner. +* `Embedder`: Create vector embeddings for each element in the structured output. Most of these are via an API call (i.e. AWS Bedrock) and therefor run async but can also use a local huggingface model which will run via multiprocessing. +* `Stager`: This is an optional step that won't apply for most pipelines. If the data needs to be modified from the existing structure to better support the upload, such as converting it to a csv for tabular-based destinations (sql dbs). +* `Uploader`: Write the local content to the destination. If none if provided, the local one will be used which writes the final result to a location on the local filesystem. If batch uploads are needed, this will run in a single process with access to all docs. If batch is not supported, all docs can be uploaded concurrently using the async approach. + + +### Sequence Diagram +![unstructured ingest sequence diagram](assets/sequence.png) + + +### Parallel Execution/Concurrency +For each step in the pipeline, a choice can be made when to run it async to support IO heavy tasks vs multiprocessing for CPU heavy loads. diff --git a/unstructured/ingest/v2/__init__.py b/unstructured/ingest/v2/__init__.py new file mode 100644 index 0000000000..9d48db4f9f --- /dev/null +++ b/unstructured/ingest/v2/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/unstructured/ingest/v2/assets/pipeline.excalidraw b/unstructured/ingest/v2/assets/pipeline.excalidraw new file mode 100644 index 0000000000..d59bc99dd5 --- /dev/null +++ b/unstructured/ingest/v2/assets/pipeline.excalidraw @@ -0,0 +1,1417 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "Y3a1yUDvwFK9AB6KmSl9a", + "type": "rectangle", + "x": 637.48046875, + "y": 239.11328125, + "width": 322.44921875, + "height": 97.015625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1a", + "roundness": { + "type": 3 + }, + "seed": 2131406971, + "version": 139, + "versionNonce": 1482689781, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7paHS6cDsoMgh1vsOhizN" + }, + { + "id": "e6DNVpQ-gH7v6WNDWWSPD", + "type": "arrow" + } + ], + "updated": 1715951675553, + "link": null, + "locked": false + }, + { + "id": "7paHS6cDsoMgh1vsOhizN", + "type": "text", + "x": 759.9351119995117, + "y": 275.12109375, + "width": 77.53993225097656, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1b", + "roundness": null, + "seed": 860081397, + "version": 12, + "versionNonce": 1588840341, + "isDeleted": false, + "boundElements": null, + "updated": 1715951674833, + "link": null, + "locked": false, + "text": "Indexing", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Y3a1yUDvwFK9AB6KmSl9a", + "originalText": "Indexing", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 205, + "versionNonce": 1999066491, + "index": "b1c", + "isDeleted": false, + "id": "LZrKOvKX6nGWVOrEpPaPS", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 637.244140625, + "y": 406.7421875, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "width": 322.44921875, + "height": 97.015625, + "seed": 882087163, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "SjYgGO3cAHPreH7mJVBdm" + }, + { + "id": "e6DNVpQ-gH7v6WNDWWSPD", + "type": "arrow" + }, + { + "id": "Dn6kngn7QXyxmlCbzgO2R", + "type": "arrow" + } + ], + "updated": 1715951678396, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 88, + "versionNonce": 1992691451, + "index": "b1d", + "isDeleted": false, + "id": "SjYgGO3cAHPreH7mJVBdm", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 741.9687957763672, + "y": 442.75, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 112.99990844726562, + "height": 25, + "seed": 820854171, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951530614, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Downloading", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "LZrKOvKX6nGWVOrEpPaPS", + "originalText": "Downloading", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 252, + "versionNonce": 1617745173, + "index": "b1e", + "isDeleted": false, + "id": "62UjU0YjVR7TvLe7hLQCV", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 644.884765625, + "y": 586.75390625, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "width": 322.44921875, + "height": 97.015625, + "seed": 1549110491, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "vRabBFX0KOEkJ6d4rZF5D" + }, + { + "id": "Dn6kngn7QXyxmlCbzgO2R", + "type": "arrow" + }, + { + "id": "0Q1io01It2PX9ESFiW49G", + "type": "arrow" + } + ], + "updated": 1715951680142, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 146, + "versionNonce": 1440901275, + "index": "b1f", + "isDeleted": false, + "id": "vRabBFX0KOEkJ6d4rZF5D", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 739.9794387817383, + "y": 622.76171875, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 132.25987243652344, + "height": 25, + "seed": 560281979, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951539363, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Uncompressing", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "62UjU0YjVR7TvLe7hLQCV", + "originalText": "Uncompressing", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 329, + "versionNonce": 1236647227, + "index": "b1g", + "isDeleted": false, + "id": "GZLTgdXXsgXo-4rDdd7BN", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 642.740234375, + "y": 752.87109375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "width": 322.44921875, + "height": 97.015625, + "seed": 857787003, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "3nbrNuxDWK3BIkJVVUKYs" + }, + { + "id": "0Q1io01It2PX9ESFiW49G", + "type": "arrow" + }, + { + "id": "5rxlnALV4R8RNKSSzjawZ", + "type": "arrow" + } + ], + "updated": 1715951692576, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 237, + "versionNonce": 1218981717, + "index": "b1h", + "isDeleted": false, + "id": "3nbrNuxDWK3BIkJVVUKYs", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 748.6249008178711, + "y": 788.87890625, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 110.67988586425781, + "height": 25, + "seed": 590856987, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951571504, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Partitioning", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "GZLTgdXXsgXo-4rDdd7BN", + "originalText": "Partitioning", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 425, + "versionNonce": 1862353237, + "index": "b1i", + "isDeleted": false, + "id": "JGKFyGpX1KS2mJhIpFiBT", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 642.431640625, + "y": 916.02734375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#eebefa", + "width": 322.44921875, + "height": 97.015625, + "seed": 1945073307, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "mPevqaKIOyvM1_XLXsPLZ" + }, + { + "id": "5rxlnALV4R8RNKSSzjawZ", + "type": "arrow" + }, + { + "id": "xsN-wlmdU5K7UGi95CYsI", + "type": "arrow" + } + ], + "updated": 1715951696070, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 340, + "versionNonce": 937753339, + "index": "b1j", + "isDeleted": false, + "id": "mPevqaKIOyvM1_XLXsPLZ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 765.1862869262695, + "y": 952.03515625, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 76.93992614746094, + "height": 25, + "seed": 161213243, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951559401, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Chunking", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "JGKFyGpX1KS2mJhIpFiBT", + "originalText": "Chunking", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 527, + "versionNonce": 1327555355, + "index": "b1k", + "isDeleted": false, + "id": "7SOrKIkV23-VpsfKkBWnF", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 641.716796875, + "y": 1079.15234375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#eebefa", + "width": 322.44921875, + "height": 97.015625, + "seed": 1437476219, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "-UFDNMIXOpAYsEf9ubpNz" + }, + { + "id": "xsN-wlmdU5K7UGi95CYsI", + "type": "arrow" + }, + { + "id": "foUafDsehtG66kl3x246k", + "type": "arrow" + } + ], + "updated": 1715951698569, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 451, + "versionNonce": 1228878331, + "index": "b1l", + "isDeleted": false, + "id": "-UFDNMIXOpAYsEf9ubpNz", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 756.0714492797852, + "y": 1115.16015625, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 93.73991394042969, + "height": 25, + "seed": 1633795611, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951569483, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Embedding", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "7SOrKIkV23-VpsfKkBWnF", + "originalText": "Embedding", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 421, + "versionNonce": 1862165339, + "index": "b1m", + "isDeleted": false, + "id": "JncRqJ0FdwNeHFO0WQj7j", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 641.271484375, + "y": 1250.0859375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 322.44921875, + "height": 97.015625, + "seed": 207501755, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "4aD6_9mkOZYxvLuujjZJ3" + }, + { + "id": "foUafDsehtG66kl3x246k", + "type": "arrow" + }, + { + "id": "bZvxt2MfEmkgYplJGYvAF", + "type": "arrow" + } + ], + "updated": 1715951685444, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 335, + "versionNonce": 1654728507, + "index": "b1n", + "isDeleted": false, + "id": "4aD6_9mkOZYxvLuujjZJ3", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 767.2161254882812, + "y": 1286.09375, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 70.5599365234375, + "height": 25, + "seed": 696601179, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951578801, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Staging", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "JncRqJ0FdwNeHFO0WQj7j", + "originalText": "Staging", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 405, + "versionNonce": 2565851, + "index": "b1o", + "isDeleted": false, + "id": "YZqdS6HqxV0eCvZhb-1TG", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 637.533203125, + "y": 1406.921875, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 322.44921875, + "height": 97.015625, + "seed": 586095477, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "X0wnY-7I3y5NxPAIay-cU" + }, + { + "id": "bZvxt2MfEmkgYplJGYvAF", + "type": "arrow" + } + ], + "updated": 1715952782049, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 327, + "versionNonce": 236892981, + "index": "b1p", + "isDeleted": false, + "id": "X0wnY-7I3y5NxPAIay-cU", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 754.2878494262695, + "y": 1442.9296875, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 88.93992614746094, + "height": 25, + "seed": 1170597077, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715952784484, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Uploading", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "YZqdS6HqxV0eCvZhb-1TG", + "originalText": "Uploading", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "e6DNVpQ-gH7v6WNDWWSPD", + "type": "arrow", + "x": 792.36328125, + "y": 344.94140625, + "width": 0, + "height": 56.38671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1r", + "roundness": { + "type": 2 + }, + "seed": 1826370165, + "version": 50, + "versionNonce": 1269906229, + "isDeleted": false, + "boundElements": null, + "updated": 1715951643784, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 56.38671875 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Y3a1yUDvwFK9AB6KmSl9a", + "focus": 0.03933516663234279, + "gap": 8.8125 + }, + "endBinding": { + "elementId": "LZrKOvKX6nGWVOrEpPaPS", + "focus": -0.037869335045489234, + "gap": 5.4140625 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "Dn6kngn7QXyxmlCbzgO2R", + "type": "arrow", + "x": 796.0859375, + "y": 512.30078125, + "width": 0, + "height": 62.3828125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1s", + "roundness": { + "type": 2 + }, + "seed": 414059669, + "version": 60, + "versionNonce": 138024373, + "isDeleted": false, + "boundElements": null, + "updated": 1715951647788, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 62.3828125 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "LZrKOvKX6nGWVOrEpPaPS", + "focus": 0.014779458974887034, + "gap": 8.54296875 + }, + "endBinding": { + "elementId": "62UjU0YjVR7TvLe7hLQCV", + "focus": -0.06217064217960677, + "gap": 12.0703125 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "0Q1io01It2PX9ESFiW49G", + "type": "arrow", + "x": 796.01953125, + "y": 695.125, + "width": 0, + "height": 47.18359375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1t", + "roundness": { + "type": 2 + }, + "seed": 2076044405, + "version": 53, + "versionNonce": 518155253, + "isDeleted": false, + "boundElements": null, + "updated": 1715951652693, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 47.18359375 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "62UjU0YjVR7TvLe7hLQCV", + "focus": 0.06258252874120199, + "gap": 11.35546875 + }, + "endBinding": { + "elementId": "GZLTgdXXsgXo-4rDdd7BN", + "focus": -0.049281015663803655, + "gap": 10.5625 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "5rxlnALV4R8RNKSSzjawZ", + "type": "arrow", + "x": 796.625, + "y": 862.3984375, + "width": 0, + "height": 40.19921875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1u", + "roundness": { + "type": 2 + }, + "seed": 343257781, + "version": 31, + "versionNonce": 60053493, + "isDeleted": false, + "boundElements": null, + "updated": 1715951657891, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 40.19921875 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "GZLTgdXXsgXo-4rDdd7BN", + "focus": 0.04552557936690613, + "gap": 12.51171875 + }, + "endBinding": { + "elementId": "JGKFyGpX1KS2mJhIpFiBT", + "focus": -0.0436115182865519, + "gap": 13.4296875 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "xsN-wlmdU5K7UGi95CYsI", + "type": "arrow", + "x": 795.421875, + "y": 1024.8828125, + "width": 0, + "height": 39.421875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1v", + "roundness": { + "type": 2 + }, + "seed": 1318887093, + "version": 38, + "versionNonce": 303905173, + "isDeleted": false, + "boundElements": null, + "updated": 1715951661064, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 39.421875 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "JGKFyGpX1KS2mJhIpFiBT", + "focus": 0.05107393363780634, + "gap": 11.83984375 + }, + "endBinding": { + "elementId": "7SOrKIkV23-VpsfKkBWnF", + "focus": -0.04664009594534023, + "gap": 14.84765625 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "foUafDsehtG66kl3x246k", + "type": "arrow", + "x": 792.3203125, + "y": 1187.8671875, + "width": 0, + "height": 44.78515625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1w", + "roundness": { + "type": 2 + }, + "seed": 1280415829, + "version": 34, + "versionNonce": 1235268021, + "isDeleted": false, + "boundElements": null, + "updated": 1715951664610, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 44.78515625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "7SOrKIkV23-VpsfKkBWnF", + "focus": 0.06587762123396368, + "gap": 11.69921875 + }, + "endBinding": { + "elementId": "JncRqJ0FdwNeHFO0WQj7j", + "focus": -0.06311555840914873, + "gap": 17.43359375 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "bZvxt2MfEmkgYplJGYvAF", + "type": "arrow", + "x": 789.81640625, + "y": 1358.8125, + "width": 0.08602962445024787, + "height": 35.25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1x", + "roundness": { + "type": 2 + }, + "seed": 288196725, + "version": 41, + "versionNonce": 714813627, + "isDeleted": false, + "boundElements": null, + "updated": 1715952782050, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.08602962445024787, + 35.25 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "JncRqJ0FdwNeHFO0WQj7j", + "focus": 0.07864610464341526, + "gap": 11.7109375 + }, + "endBinding": { + "elementId": "YZqdS6HqxV0eCvZhb-1TG", + "focus": -0.05395713956897283, + "gap": 12.859375 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "u-6rLKVGZ91K-do_X6_7h", + "type": "rectangle", + "x": 1014.77734375, + "y": 243.0625, + "width": 22.22265625, + "height": 22.22265625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1y", + "roundness": { + "type": 3 + }, + "seed": 643949941, + "version": 184, + "versionNonce": 115789461, + "isDeleted": false, + "boundElements": null, + "updated": 1715951856984, + "link": null, + "locked": false + }, + { + "id": "i8TMmsB--w6DYXWYRe_qm", + "type": "text", + "x": 1059.00390625, + "y": 242.80859375, + "width": 758.3992919921875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b20", + "roundness": null, + "seed": 2000384187, + "version": 169, + "versionNonce": 848966645, + "isDeleted": false, + "boundElements": null, + "updated": 1715951856984, + "link": null, + "locked": false, + "text": "Steps associated with getting data from a source and ready for processing", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Steps associated with getting data from a source and ready for processing", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 271, + "versionNonce": 1366945109, + "index": "b21", + "isDeleted": false, + "id": "UMttgjHgvnZXjUlDiqbaB", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1015.4722290039062, + "y": 297.1875, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "width": 22.22265625, + "height": 22.22265625, + "seed": 2058850293, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 298, + "versionNonce": 1658550965, + "index": "b22", + "isDeleted": false, + "id": "hf4pKQ55184WTVhdPC92w", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1059.6987915039062, + "y": 296.93359375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "width": 365.3796691894531, + "height": 25, + "seed": 1703659861, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Creating structured/enriched content", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Creating structured/enriched content", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 269, + "versionNonce": 1600412693, + "index": "b23", + "isDeleted": false, + "id": "N4kjMAQ-BqLtvUxn3gpN_", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1017.2026977539062, + "y": 354.03125, + "strokeColor": "#1e1e1e", + "backgroundColor": "#eebefa", + "width": 22.22265625, + "height": 22.22265625, + "seed": 548622613, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 292, + "versionNonce": 252318069, + "index": "b24", + "isDeleted": false, + "id": "VZCSNlIntRGixA1659IRA", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1061.4292602539062, + "y": 353.77734375, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "width": 367.4396667480469, + "height": 25, + "seed": 347235957, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Reformatting the structured content", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Reformatting the structured content", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 249, + "versionNonce": 521280213, + "index": "b25", + "isDeleted": false, + "id": "-mFRWLXO9Tam2O1loV1l8", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1017.7183227539062, + "y": 410.453125, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 22.22265625, + "height": 22.22265625, + "seed": 1321641467, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 299, + "versionNonce": 2014443573, + "index": "b26", + "isDeleted": false, + "id": "l8FTa1uhh3FXC4DdeCjJX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1061.9448852539062, + "y": 410.19921875, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 652.2393798828125, + "height": 25, + "seed": 345386651, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715951856984, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Steps associated with uploading the final result to a destination", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Steps associated with uploading the final result to a destination", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 358, + "versionNonce": 998367509, + "index": "b27", + "isDeleted": false, + "id": "3uQWJDRthA7AWVdHSokLt", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1018.3490600585938, + "y": 538.45703125, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22.22265625, + "height": 22.22265625, + "seed": 1078125621, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1715952831362, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 418, + "versionNonce": 2035692411, + "index": "b28", + "isDeleted": false, + "id": "4iycrxYTvkePRrwE9d55_", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1062.5756225585938, + "y": 538.203125, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 135.0398712158203, + "height": 25, + "seed": 1059231125, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715952836177, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Required step", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Required step", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "rectangle", + "version": 409, + "versionNonce": 1303811067, + "index": "b2B", + "isDeleted": false, + "id": "Jr-S8g5xKeXX4hA1S9VNt", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1019.7730331420898, + "y": 589.04296875, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22.22265625, + "height": 22.22265625, + "seed": 832846773, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1715952853068, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 481, + "versionNonce": 989351029, + "index": "b2C", + "isDeleted": false, + "id": "23iPs-E6gExYad4eWTKFP", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1063.9995956420898, + "y": 588.7890625, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffc9c9", + "width": 133.33987426757812, + "height": 25, + "seed": 963443989, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1715952857188, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Optional Step", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Optional Step", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/unstructured/ingest/v2/assets/pipeline.png b/unstructured/ingest/v2/assets/pipeline.png new file mode 100644 index 0000000000..9cfcf64e80 Binary files /dev/null and b/unstructured/ingest/v2/assets/pipeline.png differ diff --git a/unstructured/ingest/v2/assets/sequence.png b/unstructured/ingest/v2/assets/sequence.png new file mode 100644 index 0000000000..6b79db305c Binary files /dev/null and b/unstructured/ingest/v2/assets/sequence.png differ diff --git a/unstructured/ingest/v2/assets/sequence.txt b/unstructured/ingest/v2/assets/sequence.txt new file mode 100644 index 0000000000..618859a6af --- /dev/null +++ b/unstructured/ingest/v2/assets/sequence.txt @@ -0,0 +1,38 @@ +title Ingest Flow + + +Pipeline->Index: Pipeline.indexer_step.run() +Index->Data Provider:fetch list of docs with metadata +Data Provider->Index: +Index->Local Filesystem:for each record, save the metadata as a json file +Index->Pipeline: pipeline records a list of files +Pipeline->Download: Pipeline.downloader_step(records) +Download->Local Filesystem: Fetch the associated metadata +Local Filesystem->Download: +Download->Data Provider: Get raw data from data provider +Download->Local Filesystem: Persist the data as raw files +Download->Pipeline: Send back a reference to the local file to process +Pipeline-->Uncompress: Optionally run if flag set to True +Uncompress->Local Filesystem: Extract tar and zip files +Uncompress->Local Filesystem: New metadata records are created for new extracted files +Uncompress->Pipeline: Send back list of pointers to new metadata files +Pipeline->Partition: Pipeline.partitioner_step(downloaded_data) +Partition-->Unstructured Api: If credentials passed in,\npassed file data to API for partitioning +Unstructured Api->Partition: +Partition->Local Filesystem: Persist results +Partition->Pipeline: Pointers to persisted results +Pipeline-->Chunk: Optionally Pipeline.chunker_step.run(records) +Chunk-->Unstructured Api: If credentials passed in,\npassed file data to API for chunking +Unstructured Api->Chunk: +Chunk->Local Filesystem: Persist results +Chunk->Pipeline: Pointers to persisted results +Pipeline-->Embed: Optionally Pipeline.embed_step.run(records) +Embed-->Embedder Api: Depending on which embedder\nis chosen, make API calls to provider +Embed->Local Filesystem: Persist results +Embed->Pipeline: Pointers to persisted results +Pipeline->Stage: Optionally Pipeline.stager_step.run(records) +Stage->Local Filesystem: manipulate the records to better upload +Stage->Pipeline: Pointers to persisted results +Pipeline->Upload: Pipeline.upload_step.run() +Upload->Data Destination: +Pipeline->Local Filesystem: Cleanup diff --git a/unstructured/ingest/v2/cli/README.md b/unstructured/ingest/v2/cli/README.md new file mode 100644 index 0000000000..4d60d4ccf1 --- /dev/null +++ b/unstructured/ingest/v2/cli/README.md @@ -0,0 +1,28 @@ +# Ingest CLI +This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline. + +## Design Reference +[cli.py](./cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all +source and destination connectors. + +To manually run the cli: +```shell +PYTHONPATH=. python unstructured/ingest/v2/main.py --help +``` + +The `main.py` file simply wraps the generated Click command created in `cli.py`. + +### Source Commands +All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to +different connectors with shared and unique parameters. + +### Destination Commands +All destination commands are added as sub commands to each parent source command. This allows each invocation of the source +sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the +generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as `Destinations`). + +### Configs +The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that +are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run. +Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary +into dataclasses that have nexted fields (such as access configs). diff --git a/unstructured/ingest/v2/cli/__init__.py b/unstructured/ingest/v2/cli/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/v2/cli/base/__init__.py b/unstructured/ingest/v2/cli/base/__init__.py new file mode 100644 index 0000000000..ed07a1684d --- /dev/null +++ b/unstructured/ingest/v2/cli/base/__init__.py @@ -0,0 +1,4 @@ +from .dest import DestCmd +from .src import SrcCmd + +__all__ = ["SrcCmd", "DestCmd"] diff --git a/unstructured/ingest/v2/cli/base/cmd.py b/unstructured/ingest/v2/cli/base/cmd.py new file mode 100644 index 0000000000..76badac6c2 --- /dev/null +++ b/unstructured/ingest/v2/cli/base/cmd.py @@ -0,0 +1,171 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Optional, Type, TypeVar + +import click + +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.cli.utils import extract_config +from unstructured.ingest.v2.interfaces import ProcessorConfig +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.pipeline import Pipeline +from unstructured.ingest.v2.processes.chunker import Chunker, ChunkerConfig +from unstructured.ingest.v2.processes.connector_registry import ( + DownloaderT, + IndexerT, + UploaderT, + UploadStagerT, + destination_registry, + source_registry, +) +from unstructured.ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig +from unstructured.ingest.v2.processes.embedder import Embedder, EmbedderConfig +from unstructured.ingest.v2.processes.partitioner import Partitioner, PartitionerConfig + +CommandT = TypeVar("CommandT", bound=click.Command) + + +@dataclass +class BaseCmd(ABC): + cmd_name: str + default_configs: list[Type[CliConfig]] = field(default_factory=list) + + @property + def cmd_name_key(self): + return self.cmd_name.replace("-", "_") + + @abstractmethod + def cmd(self, ctx: click.Context, **options) -> None: + pass + + def add_options(self, cmd: CommandT, extras: list[Type[CliConfig]]) -> CommandT: + configs = self.default_configs + # make sure what's unique to this cmd appears first + extras.extend(configs) + for config in extras: + try: + config.add_cli_options(cmd=cmd) + except ValueError as e: + raise ValueError(f"failed to set configs from {config.__name__}: {e}") + return cmd + + def get_pipline( + self, + src: str, + source_options: dict[str, Any], + dest: Optional[str] = None, + destination_options: Optional[dict[str, Any]] = None, + ) -> Pipeline: + logger.debug( + f"creating pipeline from cli using source {src} with options: {source_options}" + ) + pipeline_kwargs: dict[str, Any] = { + "context": self.get_processor_config(options=source_options), + "downloader": self.get_downloader(src=src, options=source_options), + "indexer": self.get_indexer(src=src, options=source_options), + "partitioner": self.get_partitioner(options=source_options), + } + if chunker := self.get_chunker(options=source_options): + pipeline_kwargs["chunker"] = chunker + if embedder := self.get_embeder(options=source_options): + pipeline_kwargs["embedder"] = embedder + if dest: + logger.debug( + f"setting destination on pipeline {dest} with options: {destination_options}" + ) + if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options): + pipeline_kwargs["upload_stager"] = uploader_stager + pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options) + else: + # Default to local uploader + # TODO remove after v1 no longer supported + destination_options = destination_options or {} + if "output_dir" not in destination_options: + destination_options["output_dir"] = source_options["output_dir"] + pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options) + return Pipeline(**pipeline_kwargs) + + @staticmethod + def get_default_uploader(options: dict[str, Any]) -> UploaderT: + uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig) + return LocalUploader(upload_config=uploader_config) + + @staticmethod + def get_chunker(options: dict[str, Any]) -> Optional[Chunker]: + chunker_config = extract_config(flat_data=options, config=ChunkerConfig) + if not chunker_config.chunking_strategy: + return None + return Chunker(config=chunker_config) + + @staticmethod + def get_embeder(options: dict[str, Any]) -> Optional[Embedder]: + embedder_config = extract_config(flat_data=options, config=EmbedderConfig) + if not embedder_config.embedding_provider: + return None + return Embedder(config=embedder_config) + + @staticmethod + def get_partitioner(options: dict[str, Any]) -> Partitioner: + partitioner_config = extract_config(flat_data=options, config=PartitionerConfig) + return Partitioner(config=partitioner_config) + + @staticmethod + def get_processor_config(options: dict[str, Any]) -> ProcessorConfig: + return extract_config(flat_data=options, config=ProcessorConfig) + + @staticmethod + def get_indexer(src: str, options: dict[str, Any]) -> IndexerT: + source_entry = source_registry[src] + indexer_kwargs: dict[str, Any] = {} + if indexer_config_cls := source_entry.indexer_config: + indexer_kwargs["index_config"] = extract_config( + flat_data=options, config=indexer_config_cls + ) + if connection_config_cls := source_entry.connection_config: + indexer_kwargs["connection_config"] = extract_config( + flat_data=options, config=connection_config_cls + ) + indexer_cls = source_entry.indexer + return indexer_cls(**indexer_kwargs) + + @staticmethod + def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT: + source_entry = source_registry[src] + downloader_kwargs: dict[str, Any] = {} + if downloader_config_cls := source_entry.downloader_config: + downloader_kwargs["download_config"] = extract_config( + flat_data=options, config=downloader_config_cls + ) + if connection_config_cls := source_entry.connection_config: + downloader_kwargs["connection_config"] = extract_config( + flat_data=options, config=connection_config_cls + ) + downloader_cls = source_entry.downloader + return downloader_cls(**downloader_kwargs) + + @staticmethod + def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]: + dest_entry = destination_registry[dest] + upload_stager_kwargs: dict[str, Any] = {} + if upload_stager_config_cls := dest_entry.upload_stager_config: + upload_stager_kwargs["config"] = extract_config( + flat_data=options, config=upload_stager_config_cls + ) + if upload_stager_cls := dest_entry.upload_stager: + return upload_stager_cls(**upload_stager_kwargs) + return None + + @staticmethod + def get_uploader(dest, options: dict[str, Any]) -> UploaderT: + dest_entry = destination_registry[dest] + uploader_kwargs: dict[str, Any] = {} + if uploader_config_cls := dest_entry.uploader_config: + uploader_kwargs["upload_config"] = extract_config( + flat_data=options, config=uploader_config_cls + ) + if connection_config_cls := dest_entry.connection_config: + uploader_kwargs["connection_config"] = extract_config( + flat_data=options, config=connection_config_cls + ) + uploader_cls = dest_entry.uploader + return uploader_cls(**uploader_kwargs) diff --git a/unstructured/ingest/v2/cli/base/dest.py b/unstructured/ingest/v2/cli/base/dest.py new file mode 100644 index 0000000000..5d12fb21d6 --- /dev/null +++ b/unstructured/ingest/v2/cli/base/dest.py @@ -0,0 +1,56 @@ +import logging +from dataclasses import dataclass +from typing import Optional, Type + +import click + +from unstructured.ingest.cli.utils import conform_click_options +from unstructured.ingest.v2.cli.base.cmd import BaseCmd +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.logger import logger + + +@dataclass +class DestCmd(BaseCmd): + connection_config: Optional[Type[CliConfig]] = None + uploader_config: Optional[Type[CliConfig]] = None + upload_stager_config: Optional[Type[CliConfig]] = None + + def cmd(self, ctx: click.Context, **options) -> None: + logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO) + if not ctx.parent: + raise click.ClickException("destination command called without a parent") + if not ctx.parent.info_name: + raise click.ClickException("parent command missing info name") + source_cmd = ctx.parent.info_name.replace("-", "_") + source_options: dict = ctx.parent.params if ctx.parent else {} + conform_click_options(options) + try: + pipeline = self.get_pipline( + src=source_cmd, + source_options=source_options, + dest=self.cmd_name, + destination_options=options, + ) + pipeline.run() + except Exception as e: + logger.error(f"failed to run destination command {self.cmd_name}: {e}", exc_info=True) + raise click.ClickException(str(e)) from e + + def get_cmd(self) -> click.Command: + # Dynamically create the command without the use of click decorators + fn = self.cmd + fn = click.pass_context(fn) + cmd = click.command(fn) + if not isinstance(cmd, click.core.Command): + raise ValueError(f"generated command was not of expected type Command: {type(cmd)}") + cmd.name = self.cmd_name + cmd.short_help = "v2" + cmd.invoke_without_command = True + extras = [ + x + for x in [self.uploader_config, self.upload_stager_config, self.connection_config] + if x + ] + self.add_options(cmd, extras=extras) + return cmd diff --git a/unstructured/ingest/v2/cli/base/src.py b/unstructured/ingest/v2/cli/base/src.py new file mode 100644 index 0000000000..0aac121995 --- /dev/null +++ b/unstructured/ingest/v2/cli/base/src.py @@ -0,0 +1,70 @@ +import logging +from dataclasses import dataclass, field +from typing import Optional, Type + +import click + +from unstructured.ingest.cli.utils import Group, conform_click_options +from unstructured.ingest.v2.cli.base.cmd import BaseCmd +from unstructured.ingest.v2.cli.configs import ( + ChunkerCliConfig, + EmbedderCliConfig, + PartitionerCliConfig, + ProcessorCliConfig, +) +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.logger import logger + + +@dataclass +class SrcCmd(BaseCmd): + indexer_config: Optional[Type[CliConfig]] = None + downloader_config: Optional[Type[CliConfig]] = None + connection_config: Optional[Type[CliConfig]] = None + default_configs: list[CliConfig] = field( + default_factory=lambda: [ + ProcessorCliConfig, + PartitionerCliConfig, + EmbedderCliConfig, + ChunkerCliConfig, + ] + ) + + def cmd(self, ctx: click.Context, **options) -> None: + if ctx.invoked_subcommand: + return + + conform_click_options(options) + logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO) + try: + pipeline = self.get_pipline(src=self.cmd_name, source_options=options) + pipeline.run() + except Exception as e: + logger.error(f"failed to run source command {self.cmd_name}: {e}", exc_info=True) + raise click.ClickException(str(e)) from e + + def get_cmd(self) -> click.Group: + # Dynamically create the command without the use of click decorators + fn = self.cmd + fn = click.pass_context(fn) + cmd = click.group(fn, cls=Group) + if not isinstance(cmd, click.core.Group): + raise ValueError(f"generated src command was not of expected type Group: {type(cmd)}") + cmd.name = self.cmd_name + cmd.short_help = "v2" + cmd.invoke_without_command = True + extras = [ + x for x in [self.indexer_config, self.downloader_config, self.connection_config] if x + ] + self.add_options(cmd, extras=extras) + + # TODO remove after v1 no longer supported + cmd.params.append( + click.Option( + ["--output-dir"], + required=False, + type=str, + help="Local path to write partitioned output to", + ) + ) + return cmd diff --git a/unstructured/ingest/v2/cli/cli.py b/unstructured/ingest/v2/cli/cli.py new file mode 100644 index 0000000000..a53c43565d --- /dev/null +++ b/unstructured/ingest/v2/cli/cli.py @@ -0,0 +1,24 @@ +import click + +from unstructured.ingest.v2.cli.cmds import dest, src + + +@click.group() +def ingest(): + pass + + +def get_cmd() -> click.Command: + """Construct and return a Click command object representing the main command for the CLI. + + This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those + to the main command as nested subcommands. + """ + cmd = ingest + # Add all subcommands + for src_subcommand in src: + # Add all destination subcommands + for dest_subcommand in dest: + src_subcommand.add_command(dest_subcommand) + cmd.add_command(src_subcommand) + return cmd diff --git a/unstructured/ingest/v2/cli/cmds/__init__.py b/unstructured/ingest/v2/cli/cmds/__init__.py new file mode 100644 index 0000000000..c80f504e0c --- /dev/null +++ b/unstructured/ingest/v2/cli/cmds/__init__.py @@ -0,0 +1,8 @@ +import click + +from .fsspec.s3 import s3_dest_cmd, s3_src_cmd +from .local import local_dest_cmd, local_src_cmd + +src: list[click.Group] = [v.get_cmd() for v in [local_src_cmd, s3_src_cmd]] + +dest: list[click.Command] = [v.get_cmd() for v in [local_dest_cmd, s3_dest_cmd]] diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py b/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py b/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py new file mode 100644 index 0000000000..8b7dc31cc3 --- /dev/null +++ b/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass + +import click + +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.cli.utils import DelimitedString + + +@dataclass +class FsspecCliDownloadConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + return [ + click.Option( + ["--download-dir"], + help="Where files are downloaded to, defaults to a location at" + "`$HOME/.cache/unstructured/ingest//`.", + ), + ] + + +@dataclass +class FsspecCliFileConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + return [ + click.Option( + ["--remote-url"], + required=True, + help="Remote fsspec URL formatted as `protocol://dir/path`", + ) + ] + + +@dataclass +class FsspecCliIndexerConfig(FsspecCliFileConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = super(FsspecCliIndexerConfig, FsspecCliIndexerConfig).get_cli_options() + options.extend( + [ + click.Option( + ["--recursive"], + is_flag=True, + default=False, + help="Recursively download files in their respective folders " + "otherwise stop at the files in provided folder level.", + ), + click.Option( + ["--file-glob"], + default=None, + type=DelimitedString(), + help="A comma-separated list of file globs to limit which types of " + "local files are accepted, e.g. '*.html,*.txt'", + ), + ] + ) + return options diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/s3.py b/unstructured/ingest/v2/cli/cmds/fsspec/s3.py new file mode 100644 index 0000000000..19da0d2bee --- /dev/null +++ b/unstructured/ingest/v2/cli/cmds/fsspec/s3.py @@ -0,0 +1,93 @@ +from dataclasses import dataclass + +import click + +from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd +from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( + FsspecCliDownloadConfig, + FsspecCliFileConfig, + FsspecCliIndexerConfig, +) +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.processes.connectors.fsspec.s3 import ( + CONNECTOR_TYPE, +) + + +@dataclass +class S3CliDownloadConfig(FsspecCliDownloadConfig): + pass + + +@dataclass +class S3CliIndexerConfig(FsspecCliIndexerConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--remote-url"], + required=True, + help="Remote fsspec URL formatted as `protocol://dir/path`", + ), + ] + return options + + +@dataclass +class S3CliConnectionConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--anonymous"], + is_flag=True, + default=False, + help="Connect to s3 without local AWS credentials.", + ), + click.Option( + ["--endpoint-url"], + type=str, + default=None, + help="Use this endpoint_url, if specified. Needed for " + "connecting to non-AWS S3 buckets.", + ), + click.Option( + ["--key"], + type=str, + default=None, + help="If not anonymous, use this access key ID, if specified. Takes precedence " + "over `aws_access_key_id` in client_kwargs.", + ), + click.Option( + ["--secret"], + type=str, + default=None, + help="If not anonymous, use this secret access key, if specified.", + ), + click.Option( + ["--token"], + type=str, + default=None, + help="If not anonymous, use this security token, if specified.", + ), + ] + return options + + +@dataclass +class S3UploaderConfig(FsspecCliFileConfig): + pass + + +s3_src_cmd = SrcCmd( + cmd_name=CONNECTOR_TYPE, + indexer_config=S3CliIndexerConfig, + connection_config=S3CliConnectionConfig, + downloader_config=S3CliDownloadConfig, +) + +s3_dest_cmd = DestCmd( + cmd_name=CONNECTOR_TYPE, + connection_config=S3CliConnectionConfig, + uploader_config=S3UploaderConfig, +) diff --git a/unstructured/ingest/v2/cli/cmds/local.py b/unstructured/ingest/v2/cli/cmds/local.py new file mode 100644 index 0000000000..f9ab173085 --- /dev/null +++ b/unstructured/ingest/v2/cli/cmds/local.py @@ -0,0 +1,60 @@ +from dataclasses import dataclass + +import click + +from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.cli.utils import DelimitedString +from unstructured.ingest.v2.processes.connectors.local import CONNECTOR_TYPE + + +@dataclass +class LocalCliIndexerConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--input-path"], + required=True, + type=click.Path(file_okay=True, dir_okay=True, exists=True), + help="Path to the location in the local file system that will be processed.", + ), + click.Option( + ["--file-glob"], + default=None, + type=DelimitedString(), + help="A comma-separated list of file globs to limit which types of " + "local files are accepted, e.g. '*.html,*.txt'", + ), + click.Option( + ["--recursive"], + is_flag=True, + default=False, + help="Recursively download files in their respective folders " + "otherwise stop at the files in provided folder level.", + ), + ] + return options + + +@dataclass +class LocalCliUploaderConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--output-dir"], + required=True, + type=str, + help="Local path to write partitioned output to", + ) + ] + return options + + +local_src_cmd = SrcCmd( + cmd_name=CONNECTOR_TYPE, + indexer_config=LocalCliIndexerConfig, +) + +local_dest_cmd = DestCmd(cmd_name=CONNECTOR_TYPE, uploader_config=LocalCliUploaderConfig) diff --git a/unstructured/ingest/v2/cli/configs/__init__.py b/unstructured/ingest/v2/cli/configs/__init__.py new file mode 100644 index 0000000000..2b3a421923 --- /dev/null +++ b/unstructured/ingest/v2/cli/configs/__init__.py @@ -0,0 +1,6 @@ +from .chunk import ChunkerCliConfig +from .embed import EmbedderCliConfig +from .partition import PartitionerCliConfig +from .processor import ProcessorCliConfig + +__all__ = ["ChunkerCliConfig", "ProcessorCliConfig", "PartitionerCliConfig", "EmbedderCliConfig"] diff --git a/unstructured/ingest/v2/cli/configs/chunk.py b/unstructured/ingest/v2/cli/configs/chunk.py new file mode 100644 index 0000000000..b6f79641d7 --- /dev/null +++ b/unstructured/ingest/v2/cli/configs/chunk.py @@ -0,0 +1,89 @@ +from dataclasses import dataclass + +import click + +from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT +from unstructured.ingest.v2.cli.interfaces import CliConfig + + +@dataclass +class ChunkerCliConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--chunking-strategy"], + type=str, + default=None, + help="The rule-set to use to form chunks. Omit to disable chunking.", + ), + click.Option( + ["--chunk-combine-text-under-n-chars"], + type=int, + help=( + "Combine consecutive chunks when the first does not exceed this length and" + " the second will fit without exceeding the hard-maximum length. Only" + " operative for 'by_title' chunking-strategy." + ), + ), + click.Option( + ["--chunk-include-orig-elements/--chunk-no-include-orig-elements"], + is_flag=True, + default=True, + help=( + "When chunking, add the original elements consolidated to form each chunk to" + " `.metadata.orig_elements` on that chunk." + ), + ), + click.Option( + ["--chunk-max-characters"], + type=int, + default=CHUNK_MAX_CHARS_DEFAULT, + show_default=True, + help=( + "Hard maximum chunk length. No chunk will exceed this length. An oversized" + " element will be divided by text-splitting to fit this window." + ), + ), + click.Option( + ["--chunk-multipage-sections/--chunk-no-multipage-sections"], + is_flag=True, + default=CHUNK_MULTI_PAGE_DEFAULT, + help=( + "Ignore page boundaries when chunking such that elements from two different" + " pages can appear in the same chunk. Only operative for 'by_title'" + " chunking-strategy." + ), + ), + click.Option( + ["--chunk-new-after-n-chars"], + type=int, + help=( + "Soft-maximum chunk length. Another element will not be added to a chunk of" + " this length even when it would fit without exceeding the hard-maximum" + " length." + ), + ), + click.Option( + ["--chunk-overlap"], + type=int, + default=0, + show_default=True, + help=( + "Prefix chunk text with last overlap=N characters of prior chunk. Only" + " applies to oversized chunks divided by text-splitting. To apply overlap to" + " non-oversized chunks use the --overlap-all option." + ), + ), + click.Option( + ["--chunk-overlap-all"], + is_flag=True, + default=False, + help=( + "Apply overlap to chunks formed from whole elements as well as those formed" + " by text-splitting oversized elements. Overlap length is take from --overlap" + " option value." + ), + ), + ] + return options diff --git a/unstructured/ingest/v2/cli/configs/embed.py b/unstructured/ingest/v2/cli/configs/embed.py new file mode 100644 index 0000000000..69f6bc6574 --- /dev/null +++ b/unstructured/ingest/v2/cli/configs/embed.py @@ -0,0 +1,74 @@ +from dataclasses import dataclass +from typing import Any + +import click +from dataclasses_json.core import Json + +from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP +from unstructured.ingest.v2.cli.interfaces import CliConfig + + +@dataclass +class EmbedderCliConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--embedding-provider"], + help="Type of the embedding class to be used. Can be one of: " + f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}", + type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)), + ), + click.Option( + ["--embedding-api-key"], + help="API key for the embedding model, for the case an API key is needed.", + type=str, + default=None, + ), + click.Option( + ["--embedding-model-name"], + help="Embedding model name, if needed. " + "Chooses a particular LLM between different options, to embed with it.", + type=str, + default=None, + ), + click.Option( + ["--embedding-aws-access-key-id"], + help="AWS access key used for AWS-based embedders, such as bedrock", + type=str, + default=None, + ), + click.Option( + ["--embedding-aws-secret-access-key"], + help="AWS secret key used for AWS-based embedders, such as bedrock", + type=str, + default=None, + ), + click.Option( + ["--embedding-aws-region"], + help="AWS region used for AWS-based embedders, such as bedrock", + type=str, + default="us-west-2", + ), + ] + return options + + @classmethod + def from_dict(cls, kvs: Json, **kwargs: Any): + """ + Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. + This allows CLI arguments to be prepended with embedding_ during CLI invocation but + doesn't require that as part of the field names in this class + """ + if isinstance(kvs, dict): + new_kvs = { + k[len("embedding_") :]: v # noqa: E203 + for k, v in kvs.items() + if k.startswith("embedding_") + } + if len(new_kvs.keys()) == 0: + return None + if not new_kvs.get("provider"): + return None + return super().from_dict(new_kvs, **kwargs) + return super().from_dict(kvs, **kwargs) diff --git a/unstructured/ingest/v2/cli/configs/partition.py b/unstructured/ingest/v2/cli/configs/partition.py new file mode 100644 index 0000000000..ae21206357 --- /dev/null +++ b/unstructured/ingest/v2/cli/configs/partition.py @@ -0,0 +1,99 @@ +from dataclasses import dataclass + +import click + +from unstructured.ingest.cli.interfaces import DelimitedString, Dict +from unstructured.ingest.v2.cli.interfaces import CliConfig + + +@dataclass +class PartitionerCliConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--strategy"], + default="auto", + help="The method that will be used to process the documents. " + "Default: auto. Other strategies include `fast` and `hi_res`.", + ), + click.Option( + ["--ocr-languages"], + default=None, + type=DelimitedString(delimiter="+"), + help="A list of language packs to specify which languages to use for OCR, " + "separated by '+' e.g. 'eng+deu' to use the English and German language packs. " + "The appropriate Tesseract " + "language pack needs to be installed.", + ), + click.Option( + ["--encoding"], + default=None, + help="Text encoding to use when reading documents. By default the encoding is " + "detected automatically.", + ), + click.Option( + ["--skip-infer-table-types"], + type=DelimitedString(), + default=None, + help="Optional list of document types to skip table extraction on", + ), + click.Option( + ["--additional-partition-args"], + type=Dict(), + help="A json string representation of values to pass through to partition()", + ), + click.Option( + ["--fields-include"], + type=DelimitedString(), + default=["element_id", "text", "type", "metadata", "embeddings"], + help="Comma-delimited list. If set, include the specified top-level " + "fields in an element.", + ), + click.Option( + ["--flatten-metadata"], + is_flag=True, + default=False, + help="Results in flattened json elements. " + "Specifically, the metadata key values are brought to " + "the top-level of the element, and the `metadata` key itself is removed.", + ), + click.Option( + ["--metadata-include"], + default=[], + type=DelimitedString(), + help="Comma-delimited list. If set, include the specified metadata " + "fields if they exist and drop all other fields. ", + ), + click.Option( + ["--metadata-exclude"], + default=[], + type=DelimitedString(), + help="Comma-delimited list. If set, drop the specified metadata " + "fields if they exist.", + ), + click.Option( + ["--partition-by-api"], + is_flag=True, + default=False, + help="Use a remote API to partition the files." + " Otherwise, use the function from partition.auto", + ), + click.Option( + ["--partition-endpoint"], + default="https://api.unstructured.io/general/v0/general", + help="If partitioning via api, use the following host. " + "Default: https://api.unstructured.io/general/v0/general", + ), + click.Option( + ["--api-key"], + default=None, + help="API Key for partition endpoint.", + ), + click.Option( + ["--hi-res-model-name"], + default=None, + help="Model name for hi-res strategy.", + ), + ] + return options diff --git a/unstructured/ingest/v2/cli/configs/processor.py b/unstructured/ingest/v2/cli/configs/processor.py new file mode 100644 index 0000000000..04c62fdc44 --- /dev/null +++ b/unstructured/ingest/v2/cli/configs/processor.py @@ -0,0 +1,87 @@ +from dataclasses import dataclass + +import click + +from unstructured.ingest.v2.cli.interfaces import CliConfig +from unstructured.ingest.v2.interfaces.processor import DEFAULT_WORK_DIR + + +@dataclass +class ProcessorCliConfig(CliConfig): + @staticmethod + def get_cli_options() -> list[click.Option]: + options = [ + click.Option( + ["--reprocess"], + is_flag=True, + default=False, + help="Reprocess a downloaded file even if the relevant structured " + "output .json file in output directory already exists.", + ), + click.Option( + ["--work-dir"], + type=str, + default=DEFAULT_WORK_DIR, + show_default=True, + help="Where to place working files when processing each step", + ), + click.Option( + ["--num-processes"], + default=2, + show_default=True, + type=click.IntRange(min=1), + help="Number of parallel processes with which to process docs", + ), + click.Option( + ["--max-connections"], + default=None, + show_default=True, + type=click.IntRange(min=1), + help="Max number of connections allowed when running an async step", + ), + click.Option( + ["--raise-on-error"], + is_flag=True, + default=False, + help="Is set, will raise error if any doc in the pipeline fail. Otherwise will " + "log error and continue with other docs", + ), + click.Option( + ["--re-download"], + is_flag=True, + default=False, + help="Re-download files even if they are already present in download dir.", + ), + click.Option( + ["--preserve-downloads"], + is_flag=True, + default=False, + help="Preserve downloaded files. Otherwise each file is removed " + "after being processed successfully.", + ), + click.Option( + ["--download-only"], + is_flag=True, + default=False, + help="Download any files that are not already present in either --download-dir or " + "the default download ~/.cache/... location in case --download-dir " + "is not specified and " + "skip processing them through unstructured.", + ), + click.Option( + ["--max-docs"], + default=None, + type=int, + help="If specified, process at most the specified number of documents.", + ), + click.Option( + ["--uncompress"], + type=bool, + default=False, + is_flag=True, + help="Uncompress any archived files. Currently supporting zip and tar " + "files based on file extension.", + ), + click.Option(["--verbose"], is_flag=True, default=False), + ] + return options diff --git a/unstructured/ingest/v2/cli/interfaces.py b/unstructured/ingest/v2/cli/interfaces.py new file mode 100644 index 0000000000..559590e11d --- /dev/null +++ b/unstructured/ingest/v2/cli/interfaces.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod + +import click + + +class CliConfig(ABC): + @staticmethod + @abstractmethod + def get_cli_options() -> list[click.Option]: + pass + + @classmethod + def add_cli_options(cls, cmd: click.Command) -> None: + options_to_add = cls.get_cli_options() + CliConfig.add_params(cmd, params=options_to_add) + + @staticmethod + def add_params(cmd: click.Command, params: list[click.Parameter]): + existing_opts = [] + for param in cmd.params: + existing_opts.extend(param.opts) + + for param in params: + for opt in param.opts: + if opt in existing_opts: + raise ValueError(f"{opt} is already defined on the command {cmd.name}") + existing_opts.append(opt) + cmd.params.append(param) diff --git a/unstructured/ingest/v2/cli/utils.py b/unstructured/ingest/v2/cli/utils.py new file mode 100644 index 0000000000..07a5985e02 --- /dev/null +++ b/unstructured/ingest/v2/cli/utils.py @@ -0,0 +1,167 @@ +import json +import os.path +from dataclasses import fields, is_dataclass +from gettext import gettext, ngettext +from pathlib import Path +from typing import Any, Optional, Type, TypeVar, Union, get_args, get_origin + +import click + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.v2.logger import logger + + +class Dict(click.ParamType): + name = "dict" + + def convert( + self, + value: Any, + param: Optional[click.Parameter] = None, + ctx: Optional[click.Context] = None, + ) -> Any: + try: + return json.loads(value) + except json.JSONDecodeError: + self.fail( + gettext( + "{value} is not a valid json value.", + ).format(value=value), + param, + ctx, + ) + + +class FileOrJson(click.ParamType): + name = "file-or-json" + + def __init__(self, allow_raw_str: bool = False): + self.allow_raw_str = allow_raw_str + + def convert( + self, + value: Any, + param: Optional[click.Parameter] = None, + ctx: Optional[click.Context] = None, + ) -> Any: + # check if valid file + full_path = os.path.abspath(os.path.expanduser(value)) + if os.path.isfile(full_path): + return str(Path(full_path).resolve()) + if isinstance(value, str): + try: + return json.loads(value) + except json.JSONDecodeError: + if self.allow_raw_str: + return value + self.fail( + gettext( + "{value} is not a valid json string nor an existing filepath.", + ).format(value=value), + param, + ctx, + ) + + +class DelimitedString(click.ParamType): + name = "delimited-string" + + def __init__(self, delimiter: str = ",", choices: Optional[list[str]] = None): + self.choices = choices if choices else [] + self.delimiter = delimiter + + def convert( + self, + value: Any, + param: Optional[click.Parameter] = None, + ctx: Optional[click.Context] = None, + ) -> Any: + # In case a list is provided as the default, will not break + if isinstance(value, list): + split = [str(v).strip() for v in value] + else: + split = [v.strip() for v in value.split(self.delimiter)] + if not self.choices: + return split + choices_str = ", ".join(map(repr, self.choices)) + for s in split: + if s not in self.choices: + self.fail( + ngettext( + "{value!r} is not {choice}.", + "{value!r} is not one of {choices}.", + len(self.choices), + ).format(value=s, choice=choices_str, choices=choices_str), + param, + ctx, + ) + return split + + +EnhancedDataClassJsonMixinT = TypeVar( + "EnhancedDataClassJsonMixinT", bound=EnhancedDataClassJsonMixin +) + + +def extract_config( + flat_data: dict, config: Type[EnhancedDataClassJsonMixinT] +) -> EnhancedDataClassJsonMixinT: + """ + To be able to extract a nested dataclass from a flat dictionary (as in one coming + from a click-based options input), the config class is dynamically looked through for + nested dataclass fields and new nested dictionaries are created to conform to the + shape the overall class expects when parsing from a dict. During the process, this will create + copies of the original dictionary to avoid pruning fields but this isn't a + problem since the `from_dict()` method ignores unneeded values. + + Not handling more complex edge cases for now such as nested types i.e Union[List[List[...]]] + """ + + def conform_dict(inner_d: dict, inner_config: Type[EnhancedDataClassJsonMixinT]): + # Catch edge cases (i.e. Dict[str, ...]) where underlying type is not a concrete Class, + # causing 'issubclass() arg 1 must be a class' errors, return False + def is_subclass(instance, class_type) -> bool: + try: + return issubclass(instance, class_type) + except Exception: + return False + + dd = inner_d.copy() + for field in fields(inner_config): + f_type = field.type + # Handle the case where the type of a value if a Union (possibly optional) + if get_origin(f_type) is Union: + union_values = get_args(f_type) + # handle List types + union_values = [ + get_args(u)[0] if get_origin(u) is list else u for u in union_values + ] + # Ignore injected NoneType when optional + concrete_union_values = [v for v in union_values if not is_subclass(v, type(None))] + dataclass_union_values = [v for v in concrete_union_values if is_dataclass(v)] + non_dataclass_union_values = [ + v for v in concrete_union_values if not is_dataclass(v) + ] + if not dataclass_union_values: + continue + # Check if the key for this field already exists in the dictionary, + # if so it might map to one of these non dataclass fields and this + # can't be enforced + if non_dataclass_union_values and field.name in dd: + continue + if len(dataclass_union_values) > 1: + logger.warning( + "more than one dataclass type possible for field {}, " + "not extracting: {}".format(field.name, ", ".join(dataclass_union_values)) + ) + continue + f_type = dataclass_union_values[0] + origin = get_origin(f_type) + if origin: + f_type = origin + if is_subclass(f_type, EnhancedDataClassJsonMixin): + dd[field.name] = conform_dict(inner_d=dd, inner_config=f_type) + return dd + + adjusted_dict = conform_dict(inner_d=flat_data, inner_config=config) + return config.from_dict(adjusted_dict, apply_name_overload=False) diff --git a/unstructured/ingest/v2/example.py b/unstructured/ingest/v2/example.py new file mode 100644 index 0000000000..2690a4a48b --- /dev/null +++ b/unstructured/ingest/v2/example.py @@ -0,0 +1,35 @@ +from pathlib import Path + +from unstructured.ingest.v2.interfaces import ProcessorConfig +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.pipeline import Pipeline +from unstructured.ingest.v2.processes.chunker import ChunkerConfig +from unstructured.ingest.v2.processes.connectors.fsspec.s3 import ( + S3ConnectionConfig, + S3DownloaderConfig, + S3IndexerConfig, +) +from unstructured.ingest.v2.processes.connectors.local import ( + LocalUploaderConfig, +) +from unstructured.ingest.v2.processes.embedder import EmbedderConfig +from unstructured.ingest.v2.processes.partitioner import PartitionerConfig + +base_path = Path(__file__).parent.parent.parent.parent +docs_path = base_path / "example-docs" +work_dir = base_path / "tmp_ingest" +output_path = work_dir / "output" +download_path = work_dir / "download" + +if __name__ == "__main__": + logger.info(f"Writing all content in: {work_dir.resolve()}") + Pipeline.from_configs( + context=ProcessorConfig(work_dir=str(work_dir.resolve())), + indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"), + downloader_config=S3DownloaderConfig(download_dir=download_path), + source_connection_config=S3ConnectionConfig(anonymous=True), + partitioner_config=PartitionerConfig(strategy="fast"), + chunker_config=ChunkerConfig(chunking_strategy="by_title"), + embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), + uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), + ).run() diff --git a/unstructured/ingest/v2/interfaces/__init__.py b/unstructured/ingest/v2/interfaces/__init__.py new file mode 100644 index 0000000000..08be9bdd2a --- /dev/null +++ b/unstructured/ingest/v2/interfaces/__init__.py @@ -0,0 +1,27 @@ +from .connector import AccessConfig, BaseConnector, ConnectionConfig +from .downloader import Downloader, DownloaderConfig +from .file_data import FileData, SourceIdentifiers +from .indexer import Indexer, IndexerConfig +from .process import BaseProcess +from .processor import ProcessorConfig +from .upload_stager import UploadStager, UploadStagerConfig +from .uploader import UploadContent, Uploader, UploaderConfig + +__all__ = [ + "Downloader", + "DownloaderConfig", + "FileData", + "Indexer", + "IndexerConfig", + "BaseProcess", + "ProcessorConfig", + "UploadStager", + "UploadStagerConfig", + "Uploader", + "UploaderConfig", + "SourceIdentifiers", + "UploadContent", + "AccessConfig", + "ConnectionConfig", + "BaseConnector", +] diff --git a/unstructured/ingest/v2/interfaces/connector.py b/unstructured/ingest/v2/interfaces/connector.py new file mode 100644 index 0000000000..f71f0ca2a2 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/connector.py @@ -0,0 +1,32 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Any, Optional, TypeVar + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field + + +@dataclass +class AccessConfig(EnhancedDataClassJsonMixin): + """Meant to designate holding any sensitive information associated with other configs + and also for access specific configs.""" + + +AccessConfigT = TypeVar("AccessConfigT", bound=AccessConfig) + + +@dataclass +class ConnectionConfig(EnhancedDataClassJsonMixin): + access_config: Optional[AccessConfigT] = enhanced_field(sensitive=True, default=None) + + def get_access_config(self) -> dict[str, Any]: + if not self.access_config: + return {} + return self.access_config.to_dict(apply_name_overload=False) + + +ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig) + + +@dataclass +class BaseConnector(ABC): + connection_config: Optional[ConnectionConfigT] = None diff --git a/unstructured/ingest/v2/interfaces/downloader.py b/unstructured/ingest/v2/interfaces/downloader.py new file mode 100644 index 0000000000..aee4bc47e3 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/downloader.py @@ -0,0 +1,49 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional, TypeVar + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.v2.interfaces.connector import BaseConnector +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.interfaces.process import BaseProcess + + +@dataclass +class DownloaderConfig(EnhancedDataClassJsonMixin): + download_dir: Optional[Path] = None + + +DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig) + + +class Downloader(BaseProcess, BaseConnector, ABC): + connector_type: str + download_config: Optional[DownloaderConfigT] = field(default_factory=DownloaderConfig) + + @property + def download_dir(self) -> Path: + if self.download_config.download_dir is None: + self.download_config.download_dir = ( + Path.home() + / ".cache" + / "unstructured" + / "ingest" + / "download" + / self.connector_type + ).resolve() + return self.download_config.download_dir + + def is_async(self) -> bool: + return True + + @abstractmethod + def get_download_path(self, file_data: FileData) -> Path: + pass + + @abstractmethod + def run(self, file_data: FileData, **kwargs: Any) -> Path: + pass + + async def run_async(self, file_data: FileData, **kwargs: Any) -> Path: + return self.run(file_data=file_data, **kwargs) diff --git a/unstructured/ingest/v2/interfaces/file_data.py b/unstructured/ingest/v2/interfaces/file_data.py new file mode 100644 index 0000000000..9e2a14fc23 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/file_data.py @@ -0,0 +1,56 @@ +import json +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import Any, Optional + +from dataclasses_json import DataClassJsonMixin + +from unstructured.documents.elements import DataSourceMetadata + + +class IndexDocType(str, Enum): + BATCH = "batch" + FILE = "file" + + +@dataclass +class SourceIdentifiers: + filename: str + fullpath: str + rel_path: Optional[str] = None + additional_metadata: dict[str, Any] = field(default_factory=dict) + + @property + def filename_stem(self) -> str: + return Path(self.filename).stem + + @property + def relative_path(self) -> str: + return self.rel_path or self.fullpath + + +@dataclass +class FileData(DataClassJsonMixin): + identifier: str + connector_type: str + source_identifiers: SourceIdentifiers + doc_type: IndexDocType = field(default=IndexDocType.FILE) + metadata: DataSourceMetadata = field(default_factory=DataSourceMetadata) + reprocess: bool = False + + @classmethod + def from_file(cls, path: str) -> "FileData": + path = Path(path).resolve() + if not path.exists() or not path.is_file(): + raise ValueError(f"file path not valid: {path}") + with open(str(path.resolve()), "rb") as f: + file_data_dict = json.load(f) + file_data = FileData.from_dict(file_data_dict) + return file_data + + def to_file(self, path: str) -> None: + path = Path(path).resolve() + path.parent.mkdir(parents=True, exist_ok=True) + with open(str(path.resolve()), "w") as f: + json.dump(self.to_dict(), f, indent=2) diff --git a/unstructured/ingest/v2/interfaces/indexer.py b/unstructured/ingest/v2/interfaces/indexer.py new file mode 100644 index 0000000000..f3f2490ef0 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/indexer.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Generator, Optional, TypeVar + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.v2.interfaces.connector import BaseConnector +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.interfaces.process import BaseProcess + + +@dataclass +class IndexerConfig(EnhancedDataClassJsonMixin): + pass + + +IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig) + + +class Indexer(BaseProcess, BaseConnector, ABC): + connector_type: str + index_config: Optional[IndexerConfigT] = None + + def is_async(self) -> bool: + return False + + @abstractmethod + def run(self, **kwargs: Any) -> Generator[FileData, None, None]: + pass diff --git a/unstructured/ingest/v2/interfaces/process.py b/unstructured/ingest/v2/interfaces/process.py new file mode 100644 index 0000000000..4546800ca4 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/process.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any + + +@dataclass +class BaseProcess(ABC): + + def is_async(self) -> bool: + return False + + @abstractmethod + def run(self, **kwargs: Any) -> Any: + pass + + async def run_async(self, **kwargs: Any) -> Any: + return self.run(**kwargs) + + def check_connection(self): + # If the process requires external connections, run a quick check + pass diff --git a/unstructured/ingest/v2/interfaces/processor.py b/unstructured/ingest/v2/interfaces/processor.py new file mode 100644 index 0000000000..30c86c7545 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/processor.py @@ -0,0 +1,35 @@ +import os +from asyncio import Semaphore +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin + +DEFAULT_WORK_DIR = str((Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve()) + + +@dataclass +class ProcessorConfig(EnhancedDataClassJsonMixin): + reprocess: bool = False + verbose: bool = False + work_dir: str = field(default_factory=lambda: DEFAULT_WORK_DIR) + num_processes: int = 2 + max_connections: Optional[int] = None + raise_on_error: bool = False + disable_parallelism: bool = field( + default_factory=lambda: os.getenv("INGEST_DISABLE_PARALLELISM", "false").lower() == "true" + ) + preserve_downloads: bool = False + download_only: bool = False + max_docs: Optional[int] = None + re_download: bool = False + uncompress: bool = False + + # Used to keep track of state in pipeline + status: dict = field(default_factory=dict) + semaphore: Optional[Semaphore] = field(init=False, default=None) + + def __post_init__(self): + if self.max_connections is not None: + self.semaphore = Semaphore(self.max_connections) diff --git a/unstructured/ingest/v2/interfaces/upload_stager.py b/unstructured/ingest/v2/interfaces/upload_stager.py new file mode 100644 index 0000000000..e89ba331d3 --- /dev/null +++ b/unstructured/ingest/v2/interfaces/upload_stager.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional, TypeVar + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.interfaces.process import BaseProcess + + +@dataclass +class UploadStagerConfig(EnhancedDataClassJsonMixin): + pass + + +UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig) + + +@dataclass +class UploadStager(BaseProcess, ABC): + upload_stager_config: Optional[UploadStagerConfigT] = None + + @abstractmethod + def run(self, elements_filepath: Path, file_data: FileData, **kwargs: Any) -> Path: + pass + + async def run_async(self, elements_filepath: Path, file_data: FileData, **kwargs: Any) -> Path: + return self.run(elements_filepath=elements_filepath, file_data=file_data, **kwargs) diff --git a/unstructured/ingest/v2/interfaces/uploader.py b/unstructured/ingest/v2/interfaces/uploader.py new file mode 100644 index 0000000000..03763e299b --- /dev/null +++ b/unstructured/ingest/v2/interfaces/uploader.py @@ -0,0 +1,38 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, TypeVar + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.v2.interfaces.connector import BaseConnector +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.interfaces.process import BaseProcess + + +@dataclass +class UploaderConfig(EnhancedDataClassJsonMixin): + pass + + +UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig) + + +@dataclass +class UploadContent: + path: Path + file_data: FileData + + +@dataclass +class Uploader(BaseProcess, BaseConnector, ABC): + upload_config: UploaderConfigT = field(default_factory=UploaderConfig) + + def is_async(self) -> bool: + return False + + @abstractmethod + def run(self, contents: list[UploadContent], **kwargs: Any) -> None: + pass + + async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: + return self.run(contents=[UploadContent(path=path, file_data=file_data)], **kwargs) diff --git a/unstructured/ingest/v2/logger.py b/unstructured/ingest/v2/logger.py new file mode 100644 index 0000000000..f3d0c4f51b --- /dev/null +++ b/unstructured/ingest/v2/logger.py @@ -0,0 +1,111 @@ +import ast +import json +import os +from logging import Formatter, Logger, StreamHandler, getLevelName, getLogger +from typing import Any, Callable + +log_level = os.getenv("INGEST_LOG_LEVEL", "INFO") +LOGGER_NAME = "unstructured.ingest.v2" + + +def default_is_data_sensitive(k: str, v: Any) -> bool: + sensitive_fields = [ + "account_name", + "client_id", + ] + sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"] + return ( + v + and any([s in k.lower() for s in sensitive_triggers]) # noqa: C419 + or k.lower() in sensitive_fields + ) + + +def hide_sensitive_fields( + data: dict, is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive +) -> dict: + """ + Will recursively look through every k, v pair in this dict and any nested ones and run + is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if + any string value can be parsed as valid json and process that dict as well and replace + the original string with the json.dumps() version of the redacted dict. + """ + new_data = data.copy() + for k, v in new_data.items(): + if is_sensitive_fn(k, v): + new_data[k] = "*******" + if isinstance(v, dict): + new_data[k] = hide_sensitive_fields(v) + if isinstance(v, str): + # Need to take into account strings generated via json.dumps() or simply printing a dict + try: + json_data = json.loads(v) + if isinstance(json_data, dict): + updated_data = hide_sensitive_fields(json_data) + new_data[k] = json.dumps(updated_data) + except json.JSONDecodeError: + pass + + return new_data + + +def redact_jsons(s: str) -> str: + """ + Takes in a generic string and pulls out all valid json content. Leverages + hide_sensitive_fields() to redact any sensitive information and replaces the + original json with the new redacted format. There can be any number of valid + jsons in a generic string and this will work. Having extra '{' without a + closing '}' will cause this to break though. i.e '{ text, {"a": 3}'. + + """ + chars = list(s) + if "{" not in chars: + return s + i = 0 + jsons = [] + i = 0 + while i < len(chars): + char = chars[i] + if char == "{": + stack = [char] + current = [char] + while len(stack) != 0 and i < len(chars): + i += 1 + char = chars[i] + current.append(char) + if char == "{": + stack.append(char) + if char == "}": + stack.pop(-1) + jsons.append("".join(current)) + continue + i += 1 + for j in jsons: + try: + formatted_j = json.dumps(json.loads(j)) + except json.JSONDecodeError: + formatted_j = json.dumps(ast.literal_eval(j)) + hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j))) + s = s.replace(j, hidden_j) + return s + + +class SensitiveFormatter(Formatter): + def format(self, record): + s = super().format(record=record) + return redact_jsons(s) + + +def make_default_logger(level: int) -> Logger: + """Return a custom logger.""" + logger = getLogger(LOGGER_NAME) + handler = StreamHandler() + handler.name = "ingest_log_handler" + formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(level) + return logger + + +logger = make_default_logger(level=getLevelName(log_level.upper())) diff --git a/unstructured/ingest/v2/main.py b/unstructured/ingest/v2/main.py new file mode 100644 index 0000000000..f1b6977178 --- /dev/null +++ b/unstructured/ingest/v2/main.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +from unstructured.ingest.v2.cli.cli import get_cmd + + +def main(): + ingest_cmd = get_cmd() + ingest_cmd() + + +if __name__ == "__main__": + main() diff --git a/unstructured/ingest/v2/pipeline/__init__.py b/unstructured/ingest/v2/pipeline/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/v2/pipeline/interfaces.py b/unstructured/ingest/v2/pipeline/interfaces.py new file mode 100644 index 0000000000..73de13c5ed --- /dev/null +++ b/unstructured/ingest/v2/pipeline/interfaces.py @@ -0,0 +1,108 @@ +import asyncio +import logging +import multiprocessing as mp +from abc import ABC +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional, TypeVar + +from unstructured.ingest.v2.interfaces import BaseProcess, ProcessorConfig +from unstructured.ingest.v2.logger import logger + +BaseProcessT = TypeVar("BaseProcessT", bound=BaseProcess) +iterable_input = list[dict[str, Any]] + + +@dataclass +class PipelineStep(ABC): + process: BaseProcessT + context: ProcessorConfig + identifier: str + + def __str__(self): + return self.identifier + + def process_serially(self, iterable: iterable_input) -> Any: + logger.info("processing content serially") + if iterable: + return [self.run(**it) for it in iterable] + return [self.run()] + + async def _process_async(self, iterable: iterable_input) -> Any: + if iterable: + if len(iterable) == 1: + return [await self.run_async(**iterable[0])] + return await asyncio.gather(*[self.run_async(**i) for i in iterable]) + return [await self.run_async()] + + def process_async(self, iterable: iterable_input) -> Any: + logger.info("processing content async") + return asyncio.run(self._process_async(iterable=iterable)) + + def process_multiprocess(self, iterable: iterable_input) -> Any: + logger.info("processing content across processes") + + if iterable: + if len(iterable) == 1: + return [self.run(**iterable[0])] + if self.context.num_processes == 1: + return self.process_serially(iterable) + with mp.Pool( + processes=self.context.num_processes, + initializer=self._set_log_level, + initargs=(logging.DEBUG if self.context.verbose else logging.INFO,), + ) as pool: + return pool.map(self._wrap_mp, iterable) + return [self.run()] + + def _wrap_mp(self, input_kwargs: dict) -> Any: + # Allow mapping of kwargs via multiprocessing map() + return self.run(**input_kwargs) + + def _set_log_level(self, log_level: int): + # Set the log level for each spawned process when using multiprocessing pool + logger.setLevel(log_level) + + def __call__(self, iterable: Optional[iterable_input] = None) -> Any: + iterable = iterable or [] + if iterable: + logger.info( + f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore + ) + if self.context.disable_parallelism: + return self.process_serially(iterable=iterable) + if self.process.is_async(): + return self.process_async(iterable=iterable) + return self.process_multiprocess(iterable=iterable) + + def _run(self, *args, **kwargs: Any) -> Optional[Any]: + raise NotImplementedError + + async def _run_async(self, *args, **kwargs: Any) -> Optional[Any]: + raise NotImplementedError + + def run(self, *args, **kwargs: Any) -> Optional[Any]: + try: + return self._run(*args, **kwargs) + except Exception as e: + logger.error(f"Exception raised while running {self.identifier}", exc_info=e) + if "file_data_path" in kwargs: + self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)} + if self.context.raise_on_error: + raise e + return None + + async def run_async(self, *args, **kwargs: Any) -> Optional[Any]: + try: + return await self._run_async(*args, **kwargs) + except Exception as e: + logger.error(f"Exception raised while running {self.identifier}", exc_info=e) + if "file_data_path" in kwargs: + self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)} + if self.context.raise_on_error: + raise e + return None + + @property + def cache_dir(self) -> Path: + return Path(self.context.work_dir) / self.identifier diff --git a/unstructured/ingest/v2/pipeline/pipeline.py b/unstructured/ingest/v2/pipeline/pipeline.py new file mode 100644 index 0000000000..e18ad1f37d --- /dev/null +++ b/unstructured/ingest/v2/pipeline/pipeline.py @@ -0,0 +1,251 @@ +import logging +import multiprocessing as mp +from dataclasses import InitVar, dataclass, field +from typing import Any, Optional, Union + +from unstructured.ingest.v2.interfaces import ProcessorConfig +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.steps.chunk import Chunker, ChunkStep +from unstructured.ingest.v2.pipeline.steps.download import DownloaderT, DownloadStep +from unstructured.ingest.v2.pipeline.steps.embed import Embedder, EmbedStep +from unstructured.ingest.v2.pipeline.steps.index import IndexerT, IndexStep +from unstructured.ingest.v2.pipeline.steps.partition import Partitioner, PartitionStep +from unstructured.ingest.v2.pipeline.steps.stage import UploadStager, UploadStageStep +from unstructured.ingest.v2.pipeline.steps.uncompress import Uncompressor, UncompressStep +from unstructured.ingest.v2.pipeline.steps.upload import Uploader, UploadStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict +from unstructured.ingest.v2.processes.chunker import ChunkerConfig +from unstructured.ingest.v2.processes.connector_registry import ( + ConnectionConfigT, + DownloaderConfigT, + IndexerConfigT, + UploaderConfigT, + UploadStagerConfigT, + destination_registry, + source_registry, +) +from unstructured.ingest.v2.processes.connectors.local import LocalUploader +from unstructured.ingest.v2.processes.embedder import EmbedderConfig +from unstructured.ingest.v2.processes.partitioner import PartitionerConfig + + +@dataclass +class Pipeline: + context: ProcessorConfig + indexer: InitVar[IndexerT] + indexer_step: IndexStep = field(init=False) + downloader: InitVar[DownloaderT] + downloader_step: DownloadStep = field(init=False) + partitioner: InitVar[Partitioner] + partitioner_step: PartitionStep = field(init=False) + chunker: InitVar[Optional[Chunker]] = None + chunker_step: ChunkStep = field(init=False, default=None) + embedder: InitVar[Optional[Embedder]] = None + embedder_step: EmbedStep = field(init=False, default=None) + stager: InitVar[Optional[UploadStager]] = None + stager_step: UploadStageStep = field(init=False, default=None) + uploader: InitVar[Uploader] = field(default=LocalUploader()) + uploader_step: UploadStep = field(init=False, default=None) + uncompress_step: UncompressStep = field(init=False, default=None) + + def __post_init__( + self, + indexer: IndexerT, + downloader: DownloaderT, + partitioner: Partitioner, + chunker: Chunker = None, + embedder: Embedder = None, + stager: UploadStager = None, + uploader: Uploader = None, + ): + logger.setLevel(level=logging.DEBUG if self.context.verbose else logging.INFO) + self.indexer_step = IndexStep(process=indexer, context=self.context) + self.downloader_step = DownloadStep(process=downloader, context=self.context) + self.partitioner_step = PartitionStep(process=partitioner, context=self.context) + self.chunker_step = ChunkStep(process=chunker, context=self.context) if chunker else None + self.embedder_step = EmbedStep(process=embedder, context=self.context) if embedder else None + self.stager_step = UploadStageStep(process=stager, context=self.context) if stager else None + self.uploader_step = UploadStep(process=uploader, context=self.context) + if self.context.uncompress: + process = Uncompressor() + self.uncompress_step = UncompressStep(process=process, context=self.context) + + def cleanup(self): + pass + + def log_statuses(self): + if status := self.context.status: + logger.error(f"{len(status)} failed documents:") + for k, v in status.items(): + for kk, vv in v.items(): + logger.error(f"{k}: [{kk}] {vv}") + + def run(self): + try: + self._run() + finally: + self.log_statuses() + self.cleanup() + + def clean_results(self, results: Optional[list[Union[Any, list[Any]]]]) -> Optional[list[Any]]: + if not results: + return None + results = [r for r in results if r] + flat = [] + for r in results: + if isinstance(r, list): + flat.extend(r) + else: + flat.append(r) + final = [f for f in flat if f] + return final or None + + def _run(self): + logger.info( + f"Running local pipline: {self} with configs: " + f"{sterilize_dict(self.context.to_dict(redact_sensitive=True))}" + ) + manager = mp.Manager() + self.context.status = manager.dict() + + # Index into data source + indices = self.indexer_step.run() + indices_inputs = [{"file_data_path": i} for i in indices] + if not indices_inputs: + return + + # Download associated content to local file system + downloaded_data = self.downloader_step(indices_inputs) + downloaded_data = self.clean_results(results=downloaded_data) + if not downloaded_data: + return + + # Run uncompress if available + if self.uncompress_step: + downloaded_data = self.uncompress_step(downloaded_data) + # Flatten list of lists + downloaded_data = self.clean_results(results=downloaded_data) + + if not downloaded_data: + return + + # Partition content + elements = self.partitioner_step(downloaded_data) + elements = self.clean_results(results=elements) + if not elements: + return + + # Run element specific modifiers + for step in [self.chunker_step, self.embedder_step, self.stager_step]: + elements = step(elements) if step else elements + elements = self.clean_results(results=elements) + if not elements: + return + + # Upload the final result + self.uploader_step(iterable=elements) + + def __str__(self): + s = [str(self.indexer_step), str(self.downloader_step)] + if uncompress_step := self.uncompress_step: + s.append(str(uncompress_step)) + s.append(str(self.partitioner_step)) + if chunker_step := self.chunker_step: + s.append(str(chunker_step)) + if embedder_step := self.embedder_step: + s.append(str(embedder_step)) + if stager_step := self.stager_step: + s.append(str(stager_step)) + s.append(str(self.uploader_step)) + return " -> ".join(s) + + @classmethod + def from_configs( + cls, + context: ProcessorConfig, + indexer_config: IndexerConfigT, + downloader_config: DownloaderConfigT, + source_connection_config: ConnectionConfigT, + partitioner_config: PartitionerConfig, + chunker_config: Optional[ChunkerConfig] = None, + embedder_config: Optional[EmbedderConfig] = None, + destination_connection_config: Optional[ConnectionConfigT] = None, + stager_config: Optional[UploadStagerConfigT] = None, + uploader_config: Optional[UploaderConfigT] = None, + ) -> "Pipeline": + # Get registry key based on indexer config + source_entry = { + k: v + for k, v in source_registry.items() + if isinstance(indexer_config, v.indexer_config) + and isinstance(downloader_config, v.downloader_config) + and isinstance(source_connection_config, v.connection_config) + } + if len(source_entry) > 1: + raise ValueError( + f"multiple entries found matching provided indexer, " + f"downloader and connection configs: {source_entry}" + ) + if len(source_entry) != 1: + raise ValueError( + "no entry found in source registry with matching indexer, " + "downloader and connection configs" + ) + source = list(source_entry.values())[0] + pipeline_kwargs = { + "context": context, + "indexer": source.indexer( + index_config=indexer_config, connection_config=source_connection_config + ), + "downloader": source.downloader( + download_config=downloader_config, connection_config=source_connection_config + ), + "partitioner": Partitioner(config=partitioner_config), + } + if chunker_config: + pipeline_kwargs["chunker"] = Chunker(config=chunker_config) + if embedder_config: + pipeline_kwargs["embedder"] = Embedder(config=embedder_config) + if not uploader_config: + return Pipeline(**pipeline_kwargs) + + destination_entry = { + k: v + for k, v in destination_registry.items() + if isinstance(uploader_config, v.uploader_config) + } + if destination_connection_config: + destination_entry = { + k: v + for k, v in destination_entry.items() + if isinstance(destination_connection_config, v.connection_config) + } + if stager_config: + destination_entry = { + k: v + for k, v in destination_entry.items() + if isinstance(stager_config, v.upload_stager_config) + } + + if len(destination_entry) > 1: + raise ValueError( + f"multiple entries found matching provided uploader, " + f"stager and connection configs: {destination_entry}" + ) + if len(destination_entry) != 1: + raise ValueError( + "no entry found in source registry with matching uploader, " + "stager and connection configs" + ) + + destination = list(destination_entry.values())[0] + if stager_config: + pipeline_kwargs["stager"] = destination.upload_stager( + upload_stager_config=stager_config + ) + if uploader_config: + uploader_kwargs = {"upload_config": uploader_config} + if destination_connection_config: + uploader_kwargs["connection_config"] = destination_connection_config + pipeline_kwargs["uploader"] = destination.uploader(**uploader_kwargs) + return cls(**pipeline_kwargs) diff --git a/unstructured/ingest/v2/pipeline/steps/__init__.py b/unstructured/ingest/v2/pipeline/steps/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/v2/pipeline/steps/chunk.py b/unstructured/ingest/v2/pipeline/steps/chunk.py new file mode 100644 index 0000000000..fc31179a46 --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/chunk.py @@ -0,0 +1,94 @@ +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, TypedDict + +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict +from unstructured.ingest.v2.processes.chunker import Chunker +from unstructured.staging.base import elements_to_dicts + +STEP_ID = "chunk" + + +class ChunkStepResponse(TypedDict): + file_data_path: str + path: str + + +@dataclass +class ChunkStep(PipelineStep): + process: Chunker + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.config.chunking_strategy})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) + if self.process.config + else None + ) + logger.info(f"Created {self.identifier} with configs: {config}") + + def should_chunk(self, filepath: Path, file_data: FileData) -> bool: + if self.context.reprocess or file_data.reprocess: + return True + if not filepath.exists(): + return True + return False + + def get_output_filepath(self, filename: Path) -> Path: + hashed_output_file = f"{self.get_hash(extras=[filename.stem])}.json" + filepath = (self.cache_dir / hashed_output_file).resolve() + filepath.parent.mkdir(parents=True, exist_ok=True) + return filepath + + def _save_output(self, output_filepath: str, chunked_content: list[dict]): + with open(str(output_filepath), "w") as f: + logger.debug(f"Writing chunker output to: {output_filepath}") + json.dump(chunked_content, f, indent=2) + + def _run(self, path: str, file_data_path: str) -> ChunkStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + output_filepath = self.get_output_filepath(filename=path) + if not self.should_chunk(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping chunking, output already exists: {output_filepath}") + return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + chunked_content_raw = self.process.run(elements_filepath=path) + self._save_output( + output_filepath=str(output_filepath), + chunked_content=elements_to_dicts(chunked_content_raw), + ) + return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + async def _run_async(self, path: str, file_data_path: str) -> ChunkStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + output_filepath = self.get_output_filepath(filename=path) + if not self.should_chunk(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping chunking, output already exists: {output_filepath}") + return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + if semaphore := self.context.semaphore: + async with semaphore: + chunked_content_raw = await self.process.run_async(elements_filepath=path) + else: + chunked_content_raw = await self.process.run_async(elements_filepath=path) + self._save_output( + output_filepath=str(output_filepath), + chunked_content=elements_to_dicts(chunked_content_raw), + ) + return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + def get_hash(self, extras: Optional[list[str]]) -> str: + hashable_string = json.dumps( + self.process.config.to_dict(), sort_keys=True, ensure_ascii=True + ) + if extras: + hashable_string += "".join(extras) + return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/download.py b/unstructured/ingest/v2/pipeline/steps/download.py new file mode 100644 index 0000000000..162251bce7 --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/download.py @@ -0,0 +1,99 @@ +import hashlib +import json +from dataclasses import dataclass +from typing import Optional, TypedDict, TypeVar + +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.interfaces.downloader import Downloader +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict + +DownloaderT = TypeVar("DownloaderT", bound=Downloader) + +STEP_ID = "download" + + +class DownloadStepResponse(TypedDict): + file_data_path: str + path: str + + +@dataclass +class DownloadStep(PipelineStep): + process: DownloaderT + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.__class__.__name__})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.download_config.to_dict(redact_sensitive=True)) + if self.process.download_config + else None + ) + connection_config = ( + sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) + if self.process.connection_config + else None + ) + logger.info( + f"Created {self.identifier} with configs: {config}, " + f"connection configs: {connection_config}" + ) + + @staticmethod + def is_float(value: str): + try: + float(value) + return True + except ValueError: + return False + + def should_download(self, file_data: FileData, file_data_path: str) -> bool: + if self.context.re_download: + return True + download_path = self.process.get_download_path(file_data=file_data) + if not download_path.exists(): + return True + if ( + download_path.is_file() + and file_data.metadata.date_modified + and self.is_float(file_data.metadata.date_modified) + and download_path.stat().st_mtime > float(file_data.metadata.date_modified) + ): + # Also update file data to mark this to reprocess since this won't change the filename + file_data.reprocess = True + file_data.to_file(path=file_data_path) + return True + return False + + def _run(self, file_data_path: str) -> list[DownloadStepResponse]: + file_data = FileData.from_file(path=file_data_path) + download_path = self.process.get_download_path(file_data=file_data) + if not self.should_download(file_data=file_data, file_data_path=file_data_path): + logger.debug(f"Skipping download, file already exists locally: {download_path}") + return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))] + + download_path = self.process.run(file_data=file_data) + return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))] + + async def _run_async(self, file_data_path: str) -> list[DownloadStepResponse]: + file_data = FileData.from_file(path=file_data_path) + download_path = self.process.get_download_path(file_data=file_data) + if not self.should_download(file_data=file_data, file_data_path=file_data_path): + logger.debug(f"Skipping download, file already exists locally: {download_path}") + return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))] + if semaphore := self.context.semaphore: + async with semaphore: + download_path = await self.process.run_async(file_data=file_data) + else: + download_path = await self.process.run_async(file_data=file_data) + return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))] + + def get_hash(self, extras: Optional[list[str]]) -> str: + hashable_string = json.dumps(self.process.download_config.to_dict(), sort_keys=True) + if extras: + hashable_string += "".join(extras) + return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/embed.py b/unstructured/ingest/v2/pipeline/steps/embed.py new file mode 100644 index 0000000000..32f818476c --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/embed.py @@ -0,0 +1,96 @@ +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, TypedDict + +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict +from unstructured.ingest.v2.processes.embedder import Embedder +from unstructured.staging.base import elements_to_dicts + +STEP_ID = "embed" + + +class EmbedStepResponse(TypedDict): + file_data_path: str + path: str + + +@dataclass +class EmbedStep(PipelineStep): + process: Embedder + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.config.embedding_provider})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) + if self.process.config + else None + ) + logger.info(f"Starting {self.identifier} with configs: {config}") + + def should_embed(self, filepath: Path, file_data: FileData) -> bool: + if self.context.reprocess or file_data.reprocess: + return True + if not filepath.exists(): + return True + return False + + def get_output_filepath(self, filename: Path) -> Path: + hashed_output_file = f"{self.get_hash(extras=[filename.stem])}.json" + filepath = (self.cache_dir / hashed_output_file).resolve() + filepath.parent.mkdir(parents=True, exist_ok=True) + return filepath + + def _save_output(self, output_filepath: str, embedded_content: list[dict]): + with open(str(output_filepath), "w") as f: + logger.debug(f"Writing embedded output to: {output_filepath}") + json.dump(embedded_content, f, indent=2) + + def _run(self, path: str, file_data_path: str) -> EmbedStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + + output_filepath = self.get_output_filepath(filename=path) + if not self.should_embed(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping embedding, output already exists: {output_filepath}") + return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + embed_content_raw = self.process.run(elements_filepath=path) + self._save_output( + output_filepath=str(output_filepath), + embedded_content=elements_to_dicts(embed_content_raw), + ) + return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + async def _run_async(self, path: str, file_data_path: str) -> EmbedStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + output_filepath = self.get_output_filepath(filename=path) + if not self.should_embed(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping embedding, output already exists: {output_filepath}") + return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + if semaphore := self.context.semaphore: + async with semaphore: + embed_content_raw = await self.process.run_async(elements_filepath=path) + else: + embed_content_raw = await self.process.run_async(elements_filepath=path) + + self._save_output( + output_filepath=str(output_filepath), + embedded_content=elements_to_dicts(embed_content_raw), + ) + return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + def get_hash(self, extras: Optional[list[str]]) -> str: + hashable_string = json.dumps( + self.process.config.to_dict(), sort_keys=True, ensure_ascii=True + ) + if extras: + hashable_string += "".join(extras) + return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/index.py b/unstructured/ingest/v2/pipeline/steps/index.py new file mode 100644 index 0000000000..a62ddd2228 --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/index.py @@ -0,0 +1,61 @@ +import hashlib +import json +from dataclasses import dataclass +from typing import Generator, Optional, TypeVar + +from unstructured.ingest.v2.interfaces.indexer import Indexer +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict + +IndexerT = TypeVar("IndexerT", bound=Indexer) + +STEP_ID = "index" + + +@dataclass +class IndexStep(PipelineStep): + process: IndexerT + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.__class__.__name__})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.index_config.to_dict(redact_sensitive=True)) + if self.process.index_config + else None + ) + connection_config = ( + sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) + if self.process.connection_config + else None + ) + logger.info( + f"Created {self.identifier} with configs: {config}, " + f"connection configs: {connection_config}" + ) + + def run(self) -> Generator[str, None, None]: + for file_data in self.process.run(): + logger.debug(f"Generated file data: {file_data}") + try: + record_hash = self.get_hash(extras=[file_data.identifier]) + filename = f"{record_hash}.json" + filepath = (self.cache_dir / filename).resolve() + filepath.parent.mkdir(parents=True, exist_ok=True) + with open(str(filepath), "w") as f: + json.dump(file_data.to_dict(), f, indent=2) + yield str(filepath) + except Exception as e: + logger.error(f"failed to create index for file data: {file_data}", exc_info=True) + if self.context.raise_on_error: + raise e + continue + + def get_hash(self, extras: Optional[list[str]]) -> str: + hashable_string = json.dumps(self.process.index_config.to_dict()) + if extras: + hashable_string += "".join(extras) + return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/partition.py b/unstructured/ingest/v2/pipeline/steps/partition.py new file mode 100644 index 0000000000..4b53627f9c --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/partition.py @@ -0,0 +1,91 @@ +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, TypedDict + +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict +from unstructured.ingest.v2.processes.partitioner import Partitioner + +STEP_ID = "partition" + + +class PartitionStepResponse(TypedDict): + file_data_path: str + path: str + + +@dataclass +class PartitionStep(PipelineStep): + process: Partitioner + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.config.strategy})" + + def __post_init__(self): + config = sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) + logger.info(f"Created {self.identifier} with configs: {config}") + + def should_partition(self, filepath: Path, file_data: FileData) -> bool: + if self.context.reprocess or file_data.reprocess: + return True + if not filepath.exists(): + return True + return False + + def get_output_filepath(self, filename: Path) -> Path: + hashed_output_file = f"{self.get_hash(extras=[filename.stem])}.json" + filepath = (self.cache_dir / hashed_output_file).resolve() + filepath.parent.mkdir(parents=True, exist_ok=True) + return filepath + + def _save_output(self, output_filepath: str, partitioned_content: list[dict]): + with open(str(output_filepath), "w") as f: + logger.debug(f"Writing partitioned output to: {output_filepath}") + json.dump(partitioned_content, f, indent=2) + + def _run(self, path: str, file_data_path: str) -> PartitionStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + output_filepath = self.get_output_filepath(filename=path) + if not self.should_partition(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping partitioning, output already exists: {output_filepath}") + return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + partitioned_content = self.process.run(filename=path, metadata=file_data.metadata) + self._save_output( + output_filepath=str(output_filepath), partitioned_content=partitioned_content + ) + return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + async def _run_async(self, path: str, file_data_path: str) -> PartitionStepResponse: + path = Path(path) + file_data = FileData.from_file(path=file_data_path) + output_filepath = self.get_output_filepath(filename=path) + if not self.should_partition(filepath=output_filepath, file_data=file_data): + logger.debug(f"Skipping partitioning, output already exists: {output_filepath}") + return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + if semaphore := self.context.semaphore: + async with semaphore: + partitioned_content = await self.process.run_async( + filename=path, metadata=file_data.metadata + ) + else: + partitioned_content = await self.process.run_async( + filename=path, metadata=file_data.metadata + ) + self._save_output( + output_filepath=str(output_filepath), partitioned_content=partitioned_content + ) + return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) + + def get_hash(self, extras: Optional[list[str]]) -> str: + hashable_string = json.dumps( + self.process.config.to_dict(), sort_keys=True, ensure_ascii=True + ) + if extras: + hashable_string += "".join(extras) + return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/stage.py b/unstructured/ingest/v2/pipeline/steps/stage.py new file mode 100644 index 0000000000..e7a3644de5 --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/stage.py @@ -0,0 +1,53 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import TypedDict + +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.interfaces.upload_stager import UploadStager +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict + +STEP_ID = "upload_stage" + + +class UploadStageStepResponse(TypedDict): + file_data_path: str + path: str + + +@dataclass +class UploadStageStep(PipelineStep): + process: UploadStager + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.__class__.__name__})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.upload_stager_config.to_dict(redact_sensitive=True)) + if self.process.upload_stager_config + else None + ) + logger.info(f"Created {self.identifier} with configs: {config}") + + def _run(self, path: str, file_data_path: str) -> UploadStageStepResponse: + path = Path(path) + staged_output_path = self.process.run( + elements_filepath=path, file_data=FileData.from_file(path=file_data_path) + ) + return UploadStageStepResponse(file_data_path=file_data_path, path=str(staged_output_path)) + + async def _run_async(self, path: str, file_data_path: str) -> UploadStageStepResponse: + path = Path(path) + if semaphore := self.context.semaphore: + async with semaphore: + staged_output_path = await self.process.run_async( + elements_filepath=path, file_data=FileData.from_file(path=file_data_path) + ) + else: + staged_output_path = await self.process.run_async( + elements_filepath=path, file_data=FileData.from_file(path=file_data_path) + ) + return UploadStageStepResponse(file_data_path=file_data_path, path=str(staged_output_path)) diff --git a/unstructured/ingest/v2/pipeline/steps/uncompress.py b/unstructured/ingest/v2/pipeline/steps/uncompress.py new file mode 100644 index 0000000000..77fda8c99b --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/uncompress.py @@ -0,0 +1,62 @@ +from pathlib import Path +from typing import TypedDict + +from unstructured.ingest.v2.interfaces.file_data import FileData +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep +from unstructured.ingest.v2.pipeline.utils import sterilize_dict +from unstructured.ingest.v2.processes.uncompress import Uncompressor + +STEP_ID = "uncompress" + + +class UncompressStepResponse(TypedDict): + file_data_path: str + path: str + + +class UncompressStep(PipelineStep): + process: Uncompressor + identifier: str = STEP_ID + + def __post_init__(self): + config = ( + sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) + if self.process.config + else None + ) + logger.info(f"Created {self.identifier} with configs: {config}") + + def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]: + file_data = FileData.from_file(path=file_data_path) + new_file_data = self.process.run(file_data=file_data) + responses = [] + for new_file in new_file_data: + new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json" + new_file.to_file(path=str(new_file_data_path.resolve())) + responses.append( + UncompressStepResponse( + path=new_file.source_identifiers.fullpath, + file_data_path=str(new_file_data_path), + ) + ) + return responses + + async def _run_async(self, path: str, file_data_path: str) -> list[UncompressStepResponse]: + file_data = FileData.from_file(path=file_data_path) + if semaphore := self.context.semaphore: + async with semaphore: + new_file_data = await self.process.run_async(file_data=file_data) + else: + new_file_data = await self.process.run_async(file_data=file_data) + responses = [] + for new_file in new_file_data: + new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json" + new_file.to_file(path=str(new_file_data_path.resolve())) + responses.append( + UncompressStepResponse( + path=new_file.source_identifiers.fullpath, + file_data_path=str(new_file_data_path), + ) + ) + return responses diff --git a/unstructured/ingest/v2/pipeline/steps/upload.py b/unstructured/ingest/v2/pipeline/steps/upload.py new file mode 100644 index 0000000000..dd438bb45a --- /dev/null +++ b/unstructured/ingest/v2/pipeline/steps/upload.py @@ -0,0 +1,79 @@ +import asyncio +from dataclasses import dataclass +from pathlib import Path +from typing import TypedDict + +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.interfaces.uploader import UploadContent, Uploader +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.pipeline.interfaces import PipelineStep, iterable_input +from unstructured.ingest.v2.pipeline.utils import sterilize_dict + +STEP_ID = "upload" + + +class UploadStepContent(TypedDict): + path: str + file_data_path: str + + +@dataclass +class UploadStep(PipelineStep): + process: Uploader + identifier: str = STEP_ID + + def __str__(self): + return f"{self.identifier} ({self.process.__class__.__name__})" + + def __post_init__(self): + config = ( + sterilize_dict(self.process.upload_config.to_dict(redact_sensitive=True)) + if self.process.upload_config + else None + ) + connection_config = ( + sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) + if self.process.connection_config + else None + ) + logger.info( + f"Created {self.identifier} with configs: {config}, " + f"connection configs: {connection_config}" + ) + + def process_whole(self, iterable: iterable_input): + self.run(iterable) + + async def _process_async(self, iterable: iterable_input): + return await asyncio.gather(*[self.run_async(**i) for i in iterable]) + + def process_async(self, iterable: iterable_input): + logger.info("processing content async") + return asyncio.run(self._process_async(iterable=iterable)) + + def __call__(self, iterable: iterable_input): + logger.info( + f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore + ) + if self.process.is_async(): + self.process_async(iterable=iterable) + else: + self.process_whole(iterable=iterable) + + def _run(self, contents: list[UploadStepContent]): + upload_contents = [ + UploadContent(path=Path(c["path"]), file_data=FileData.from_file(c["file_data_path"])) + for c in contents + ] + self.process.run(contents=upload_contents) + + async def _run_async(self, path: str, file_data_path: str): + if semaphore := self.context.semaphore: + with semaphore: + await self.process.run_async( + path=Path(path), file_data=FileData.from_file(path=file_data_path) + ) + else: + await self.process.run_async( + path=Path(path), file_data=FileData.from_file(path=file_data_path) + ) diff --git a/unstructured/ingest/v2/pipeline/utils.py b/unstructured/ingest/v2/pipeline/utils.py new file mode 100644 index 0000000000..6091d7c2b4 --- /dev/null +++ b/unstructured/ingest/v2/pipeline/utils.py @@ -0,0 +1,15 @@ +import json +from datetime import datetime +from pathlib import Path + + +def sterilize_dict(data: dict) -> dict: + def json_serial(obj): + if isinstance(obj, Path): + return obj.as_posix() + if isinstance(obj, datetime): + return obj.isoformat() + raise TypeError("Type %s not serializable" % type(obj)) + + data_s = json.dumps(data, default=json_serial) + return json.loads(data_s) diff --git a/unstructured/ingest/v2/processes/__init__.py b/unstructured/ingest/v2/processes/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/unstructured/ingest/v2/processes/chunker.py b/unstructured/ingest/v2/processes/chunker.py new file mode 100644 index 0000000000..63593fcfa4 --- /dev/null +++ b/unstructured/ingest/v2/processes/chunker.py @@ -0,0 +1,83 @@ +from abc import ABC +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +from unstructured.chunking import dispatch +from unstructured.documents.elements import Element, assign_and_map_hash_ids +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field +from unstructured.ingest.v2.interfaces.process import BaseProcess +from unstructured.ingest.v2.logger import logger +from unstructured.staging.base import dict_to_elements, elements_from_json + + +@dataclass +class ChunkerConfig(EnhancedDataClassJsonMixin): + chunking_strategy: Optional[str] = None + chunking_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general" + chunk_by_api: bool = False + chunk_api_key: Optional[str] = enhanced_field(default=None, sensitive=True) + + combine_text_under_n_chars: Optional[int] = None + include_orig_elements: Optional[bool] = None + max_characters: Optional[int] = None + multipage_sections: Optional[bool] = None + new_after_n_chars: Optional[int] = None + overlap: Optional[int] = None + overlap_all: Optional[bool] = None + + def to_chunking_kwargs(self) -> dict[str, Any]: + return { + "chunking_strategy": self.chunking_strategy, + "combine_text_under_n_chars": self.combine_text_under_n_chars, + "max_characters": self.max_characters, + "include_orig_elements": self.include_orig_elements, + "multipage_sections": self.multipage_sections, + "new_after_n_chars": self.new_after_n_chars, + "overlap": self.overlap, + "overlap_all": self.overlap_all, + } + + +@dataclass +class Chunker(BaseProcess, ABC): + config: ChunkerConfig + + def is_async(self) -> bool: + return self.config.chunk_by_api + + def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: + elements = elements_from_json(filename=str(elements_filepath)) + local_chunking_strategies = ("basic", "by_title") + if self.config.chunking_strategy not in local_chunking_strategies: + logger.warning( + "chunking strategy not supported for local chunking: {}, must be one of: {}".format( + self.config.chunking_strategy, ", ".join(local_chunking_strategies) + ) + ) + return elements + chunked_elements = dispatch.chunk(elements=elements, **self.config.to_chunking_kwargs()) + assign_and_map_hash_ids(chunked_elements) + return chunked_elements + + async def run_async(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: + from unstructured_client import UnstructuredClient + from unstructured_client.models.shared import Files, PartitionParameters + + client = UnstructuredClient( + api_key_auth=self.config.chunk_api_key, + server_url=self.config.chunking_endpoint, + ) + partition_request = self.config.to_chunking_kwargs() + with open(elements_filepath, "rb") as f: + files = Files( + content=f.read(), + file_name=str(elements_filepath.resolve()), + ) + partition_request["files"] = files + partition_params = PartitionParameters(**partition_request) + resp = client.general.partition(partition_params) + elements_raw = resp.elements or [] + elements = dict_to_elements(elements_raw) + assign_and_map_hash_ids(elements) + return elements diff --git a/unstructured/ingest/v2/processes/connector_registry.py b/unstructured/ingest/v2/processes/connector_registry.py new file mode 100644 index 0000000000..41abdd4c87 --- /dev/null +++ b/unstructured/ingest/v2/processes/connector_registry.py @@ -0,0 +1,63 @@ +from dataclasses import dataclass +from typing import Optional, Type, TypeVar + +from unstructured.ingest.v2.interfaces import ( + ConnectionConfig, + Downloader, + DownloaderConfig, + Indexer, + IndexerConfig, + Uploader, + UploaderConfig, + UploadStager, + UploadStagerConfig, +) + +IndexerT = TypeVar("IndexerT", bound=Indexer) +IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig) +DownloaderT = TypeVar("DownloaderT", bound=Downloader) +DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig) +ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig) +UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig) +UploadStagerT = TypeVar("UploadStagerT", bound=UploadStager) +UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig) +UploaderT = TypeVar("UploaderT", bound=Uploader) + + +@dataclass +class SourceRegistryEntry: + indexer: Type[IndexerT] + downloader: Type[DownloaderT] + + downloader_config: Optional[Type[DownloaderConfigT]] = None + indexer_config: Optional[Type[IndexerConfigT]] = None + connection_config: Optional[Type[ConnectionConfigT]] = None + + +source_registry: dict[str, SourceRegistryEntry] = {} + + +def add_source_entry(source_type: str, entry: SourceRegistryEntry): + if source_type in source_registry: + raise ValueError(f"source {source_type} has already been registered") + source_registry[source_type] = entry + + +@dataclass +class DestinationRegistryEntry: + uploader: Type[UploaderT] + upload_stager: Optional[Type[UploadStagerT]] = None + + upload_stager_config: Optional[Type[UploadStagerConfigT]] = None + uploader_config: Optional[Type[UploaderConfigT]] = None + + connection_config: Optional[Type[ConnectionConfigT]] = None + + +destination_registry: dict[str, DestinationRegistryEntry] = {} + + +def add_destination_entry(destination_type: str, entry: DestinationRegistryEntry): + if destination_type in destination_registry: + raise ValueError(f"destination {destination_type} has already been registered") + destination_registry[destination_type] = entry diff --git a/unstructured/ingest/v2/processes/connectors/__init__.py b/unstructured/ingest/v2/processes/connectors/__init__.py new file mode 100644 index 0000000000..9d48db4f9f --- /dev/null +++ b/unstructured/ingest/v2/processes/connectors/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py b/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py new file mode 100644 index 0000000000..9d48db4f9f --- /dev/null +++ b/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py new file mode 100644 index 0000000000..b9cd74aa01 --- /dev/null +++ b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py @@ -0,0 +1,318 @@ +from __future__ import annotations + +import fnmatch +import json +import os +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from time import time +from typing import TYPE_CHECKING, Any, Generator, Optional, TypeVar + +from unstructured.documents.elements import DataSourceMetadata +from unstructured.ingest.enhanced_dataclass import enhanced_field +from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError +from unstructured.ingest.v2.interfaces import ( + AccessConfig, + ConnectionConfig, + Downloader, + DownloaderConfig, + FileData, + Indexer, + IndexerConfig, + SourceIdentifiers, + UploadContent, + Uploader, + UploaderConfig, +) +from unstructured.ingest.v2.logger import logger + +if TYPE_CHECKING: + from fsspec import AbstractFileSystem + +CONNECTOR_TYPE = "fsspec" + + +class Base(object): + def __post_init__(self): + pass + + +@dataclass +class FileConfig(Base): + remote_url: str + protocol: str = field(init=False) + path_without_protocol: str = field(init=False) + supported_protocols: list[str] = field( + default_factory=lambda: [ + "s3", + "s3a", + "abfs", + "az", + "gs", + "gcs", + "box", + "dropbox", + "sftp", + ] + ) + + def __post_init__(self): + super().__post_init__() + self.protocol, self.path_without_protocol = self.remote_url.split("://") + if self.protocol not in self.supported_protocols: + raise ValueError( + "Protocol {} not supported yet, only {} are supported.".format( + self.protocol, ", ".join(self.supported_protocols) + ), + ) + + +class FsspecIndexerConfig(FileConfig, IndexerConfig): + recursive: bool = False + file_glob: Optional[list[str]] = None + + +@dataclass +class FsspecAccessConfig(AccessConfig): + pass + + +FsspecAccessConfigT = TypeVar("FsspecAccessConfigT", bound=FsspecAccessConfig) + + +class FsspecConnectionConfig(ConnectionConfig): + access_config: FsspecAccessConfigT = enhanced_field(sensitive=True, default=None) + connector_type: str = CONNECTOR_TYPE + + +def convert_datetime(data: dict) -> dict: + def json_serial(obj): + if isinstance(obj, datetime): + return obj.isoformat() + raise TypeError("Type %s not serializable" % type(obj)) + + data_s = json.dumps(data, default=json_serial) + return json.loads(data_s) + + +FsspecIndexerConfigT = TypeVar("FsspecIndexerConfigT", bound=FsspecIndexerConfig) +FsspecConnectionConfigT = TypeVar("FsspecConnectionConfigT", bound=FsspecConnectionConfig) + + +@dataclass +class FsspecIndexer(Indexer): + connection_config: FsspecConnectionConfigT + index_config: FsspecIndexerConfigT + connector_type: str = CONNECTOR_TYPE + fs: "AbstractFileSystem" = field(init=False) + + def __post_init__(self): + from fsspec import get_filesystem_class + + self.fs: AbstractFileSystem = get_filesystem_class(self.index_config.protocol)( + **self.connection_config.get_access_config(), + ) + + def does_path_match_glob(self, path: str) -> bool: + if self.index_config.file_glob is None: + return True + patterns = self.index_config.file_glob + for pattern in patterns: + if fnmatch.filter([path], pattern): + return True + logger.debug(f"The file {path!r} is discarded as it does not match any given glob.") + return False + + def check_connection(self): + from fsspec import get_filesystem_class + + try: + fs = get_filesystem_class(self.index_config.protocol)( + **self.connection_config.get_access_config(), + ) + fs.ls(path=self.index_config.path_without_protocol, detail=False) + except Exception as e: + logger.error(f"failed to validate connection: {e}", exc_info=True) + raise SourceConnectionError(f"failed to validate connection: {e}") + + def list_files(self) -> list[str]: + if not self.index_config.recursive: + # fs.ls does not walk directories + # directories that are listed in cloud storage can cause problems + # because they are seen as 0 byte files + found = self.fs.ls(self.index_config.path_without_protocol, detail=True) + if isinstance(found, list): + return [x.get("name") for x in found if x.get("size") > 0] + else: + raise TypeError(f"unhandled response type from ls: {type(found)}") + else: + # fs.find will recursively walk directories + # "size" is a common key for all the cloud protocols with fs + found = self.fs.find( + self.index_config.path_without_protocol, + detail=True, + ) + if isinstance(found, dict): + return [k for k, v in found.items() if v.get("size") > 0] + else: + raise TypeError(f"unhandled response type from find: {type(found)}") + + def get_metadata(self, path: str) -> DataSourceMetadata: + date_created = None + date_modified = None + + try: + created: Optional[Any] = self.fs.created(path) + if created: + if isinstance(created, datetime): + date_created = str(created.timestamp()) + else: + date_created = str(created) + except NotImplementedError: + pass + + try: + modified: Optional[Any] = self.fs.modified(path) + if modified: + if isinstance(modified, datetime): + date_modified = str(modified.timestamp()) + else: + date_modified = str(modified) + except NotImplementedError: + pass + + version = self.fs.checksum(path) + return DataSourceMetadata( + date_created=date_created, + date_modified=date_modified, + date_processed=str(time()), + version=str(version), + url=f"{self.index_config.protocol}://{path}", + record_locator={ + "protocol": self.index_config.protocol, + "remote_file_path": self.index_config.remote_url, + }, + ) + + def run(self, **kwargs: Any) -> Generator[FileData, None, None]: + raw_files = self.list_files() + files = [f for f in raw_files if self.does_path_match_glob(f)] + for file in files: + yield FileData( + identifier=file, + connector_type=self.connector_type, + source_identifiers=SourceIdentifiers( + filename=Path(file).name, + rel_path=file.replace(self.index_config.path_without_protocol, ""), + fullpath=file, + additional_metadata=convert_datetime(self.fs.info(path=file)), + ), + metadata=self.get_metadata(path=file), + ) + + +class FsspecDownloaderConfig(DownloaderConfig): + pass + + +FsspecDownloaderConfigT = TypeVar("FsspecDownloaderConfigT", bound=FsspecDownloaderConfig) + + +class FsspecDownloader(Downloader): + protocol: str + connection_config: FsspecConnectionConfigT + connector_type: str = CONNECTOR_TYPE + download_config: Optional[FsspecDownloaderConfigT] = field( + default_factory=lambda: FsspecDownloaderConfig() + ) + fs: "AbstractFileSystem" = field(init=False) + + def __post_init__(self): + from fsspec import get_filesystem_class + + self.fs: AbstractFileSystem = get_filesystem_class(self.protocol)( + **self.connection_config.get_access_config(), + ) + + def get_download_path(self, file_data: FileData) -> Path: + return ( + self.download_config.download_dir / Path(file_data.source_identifiers.rel_path) + if self.download_config + else Path(file_data.source_identifiers.rel_path) + ) + + @staticmethod + def is_float(value: str): + try: + float(value) + return True + except ValueError: + return False + + def run(self, file_data: FileData, **kwargs: Any) -> Path: + download_path = self.get_download_path(file_data=file_data) + try: + self.fs.get(rpath=file_data.identifier, lpath=download_path.as_posix()) + except Exception as e: + logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True) + raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}") + if ( + file_data.metadata.date_modified + and self.is_float(file_data.metadata.date_modified) + and file_data.metadata.date_created + and self.is_float(file_data.metadata.date_created) + ): + date_modified = float(file_data.metadata.date_modified) + date_created = float(file_data.metadata.date_created) + os.utime(download_path, times=(date_created, date_modified)) + return download_path + + +@dataclass +class FsspecUploaderConfig(FileConfig, UploaderConfig): + overwrite: bool = False + + +FsspecUploaderConfigT = TypeVar("FsspecUploaderConfigT", bound=FsspecUploaderConfig) + + +@dataclass +class FsspecUploader(Uploader): + upload_config: FsspecUploaderConfigT = field(default=None) + fs: "AbstractFileSystem" = field(init=False) + + def is_async(self) -> bool: + return True + + def __post_init__(self): + # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove: + if not self.upload_config: + raise TypeError( + f"{self.__class__.__name__}.__init__() " + f"missing 1 required positional argument: 'upload_config'" + ) + + from fsspec import get_filesystem_class + + fs_kwargs = self.connection_config.get_access_config() if self.connection_config else {} + self.fs: AbstractFileSystem = get_filesystem_class(self.upload_config.protocol)( + **fs_kwargs, + ) + + def run(self, contents: list[UploadContent], **kwargs: Any) -> None: + raise NotImplementedError + + async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: + upload_path = ( + Path(self.upload_config.path_without_protocol) + / file_data.source_identifiers.relative_path + ) + updated_upload_path = upload_path.parent / f"{upload_path.stem}.json" + upload_path_str = str(updated_upload_path) + path_str = str(path.resolve()) + if self.fs.exists(path=upload_path_str) and not self.upload_config.overwrite: + logger.debug(f"Skipping upload of {path} to {upload_path}, file already exists") + return + logger.info(f"Writing local file {path_str} to {upload_path}") + self.fs.upload(lpath=path_str, rpath=upload_path_str) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py new file mode 100644 index 0000000000..de2f740408 --- /dev/null +++ b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from time import time +from typing import Any, Generator, Optional + +from unstructured.documents.elements import DataSourceMetadata +from unstructured.ingest.enhanced_dataclass import enhanced_field +from unstructured.ingest.v2.interfaces import FileData, UploadContent +from unstructured.ingest.v2.processes.connector_registry import ( + DestinationRegistryEntry, + SourceRegistryEntry, + add_destination_entry, + add_source_entry, +) +from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( + FsspecAccessConfig, + FsspecConnectionConfig, + FsspecDownloader, + FsspecDownloaderConfig, + FsspecIndexer, + FsspecIndexerConfig, + FsspecUploader, + FsspecUploaderConfig, +) +from unstructured.utils import requires_dependencies + +CONNECTOR_TYPE = "s3" + + +@dataclass +class S3IndexerConfig(FsspecIndexerConfig): + pass + + +@dataclass +class S3AccessConfig(FsspecAccessConfig): + key: Optional[str] = None + secret: Optional[str] = None + token: Optional[str] = None + + +@dataclass +class S3ConnectionConfig(FsspecConnectionConfig): + supported_protocols: list[str] = field(default_factory=lambda: ["s3", "s3a"]) + access_config: S3AccessConfig = enhanced_field( + sensitive=True, default_factory=lambda: S3AccessConfig() + ) + endpoint_url: Optional[str] = None + anonymous: bool = False + connector_type: str = CONNECTOR_TYPE + + def get_access_config(self) -> dict[str, Any]: + access_configs: dict[str, Any] = {"anon": self.anonymous} + if self.endpoint_url: + access_configs["endpoint_url"] = self.endpoint_url + + # Avoid injecting None by filtering out k,v pairs where the value is None + access_configs.update({k: v for k, v in self.access_config.to_dict().items() if v}) + return access_configs + + +@dataclass +class S3Indexer(FsspecIndexer): + connection_config: S3ConnectionConfig + index_config: S3IndexerConfig + connector_type: str = CONNECTOR_TYPE + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def __post_init__(self): + super().__post_init__() + + def get_metadata(self, path: str) -> DataSourceMetadata: + date_created = None + date_modified = None + try: + modified: Optional[datetime] = self.fs.modified(path) + if modified: + date_created = str(modified.timestamp()) + date_modified = str(modified.timestamp()) + except NotImplementedError: + pass + + version = None + info: dict[str, Any] = self.fs.info(path) + if etag := info.get("ETag"): + version = str(etag).rstrip('"').lstrip('"') + return DataSourceMetadata( + date_created=date_created, + date_modified=date_modified, + date_processed=str(time()), + version=version, + url=f"{self.index_config.protocol}://{path}", + record_locator={ + "protocol": self.index_config.protocol, + "remote_file_path": self.index_config.remote_url, + }, + ) + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def run(self, **kwargs: Any) -> Generator[FileData, None, None]: + return super().run(**kwargs) + + +@dataclass +class S3DownloaderConfig(FsspecDownloaderConfig): + pass + + +@dataclass +class S3Downloader(FsspecDownloader): + protocol: str = "s3" + connection_config: S3ConnectionConfig + connector_type: str = CONNECTOR_TYPE + download_config: Optional[S3DownloaderConfig] = field(default_factory=S3DownloaderConfig) + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def __post_init__(self): + super().__post_init__() + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def run(self, file_data: FileData, **kwargs: Any) -> Path: + return super().run(file_data=file_data, **kwargs) + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + async def run_async(self, file_data: FileData, **kwargs: Any) -> Path: + return await super().run_async(file_data=file_data, **kwargs) + + +@dataclass +class S3UploaderConfig(FsspecUploaderConfig): + pass + + +@dataclass +class S3Upload(FsspecUploader): + connection_config: S3ConnectionConfig + upload_config: S3UploaderConfig = field(default=None) + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def __post_init__(self): + super().__post_init__() + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + def run(self, contents: list[UploadContent], **kwargs: Any) -> None: + return super().run(contents=contents, **kwargs) + + @requires_dependencies(["s3fs", "fsspec"], extras="s3") + async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: + return await super().run_async(path=path, file_data=file_data, **kwargs) + + +add_source_entry( + source_type=CONNECTOR_TYPE, + entry=SourceRegistryEntry( + indexer=S3Indexer, + indexer_config=S3IndexerConfig, + downloader=S3Downloader, + downloader_config=S3DownloaderConfig, + connection_config=S3ConnectionConfig, + ), +) + +add_destination_entry( + destination_type=CONNECTOR_TYPE, + entry=DestinationRegistryEntry(uploader=S3Upload, uploader_config=S3UploaderConfig), +) diff --git a/unstructured/ingest/v2/processes/connectors/local.py b/unstructured/ingest/v2/processes/connectors/local.py new file mode 100644 index 0000000000..faf38b7e7f --- /dev/null +++ b/unstructured/ingest/v2/processes/connectors/local.py @@ -0,0 +1,173 @@ +import glob +import itertools +import shutil +from dataclasses import dataclass, field +from pathlib import Path +from time import time +from typing import Any, Generator, Optional + +from unstructured.documents.elements import DataSourceMetadata +from unstructured.ingest.v2.interfaces import ( + Downloader, + DownloaderConfig, + FileData, + Indexer, + IndexerConfig, + SourceIdentifiers, + UploadContent, + Uploader, + UploaderConfig, +) +from unstructured.ingest.v2.logger import logger +from unstructured.ingest.v2.processes.connector_registry import ( + DestinationRegistryEntry, + SourceRegistryEntry, + add_destination_entry, + add_source_entry, +) + +CONNECTOR_TYPE = "local" + + +@dataclass +class LocalIndexerConfig(IndexerConfig): + input_path: str + recursive: bool = False + file_glob: Optional[list[str]] = None + + @property + def path(self) -> Path: + return Path(self.input_path).resolve() + + +@dataclass +class LocalIndexer(Indexer): + index_config: LocalIndexerConfig + connector_type: str = CONNECTOR_TYPE + + def list_files(self) -> list[Path]: + input_path = self.index_config.path + if input_path.is_file(): + return [Path(s) for s in glob.glob(f"{self.index_config.path}")] + glob_fn = input_path.rglob if self.index_config.recursive else input_path.glob + if not self.index_config.file_glob: + return list(glob_fn("*")) + return list( + itertools.chain.from_iterable( + glob_fn(pattern) for pattern in self.index_config.file_glob + ) + ) + + def get_file_metadata(self, path: Path) -> DataSourceMetadata: + stats = path.stat() + try: + date_modified = str(stats.st_mtime) + except Exception as e: + logger.warning(f"Couldn't detect date modified: {e}") + date_modified = None + + try: + date_created = str(stats.st_birthtime) + except Exception as e: + logger.warning(f"Couldn't detect date created: {e}") + date_created = None + + try: + mode = stats.st_mode + permissions_data = [{"mode": mode}] + except Exception as e: + logger.warning(f"Couldn't detect file mode: {e}") + permissions_data = None + return DataSourceMetadata( + date_modified=date_modified, + date_created=date_created, + date_processed=str(time()), + permissions_data=permissions_data, + record_locator={"path": str(path.resolve())}, + ) + + def run(self, **kwargs: Any) -> Generator[FileData, None, None]: + for file_path in self.list_files(): + file_data = FileData( + identifier=str(file_path.resolve()), + connector_type=CONNECTOR_TYPE, + source_identifiers=SourceIdentifiers( + fullpath=str(file_path.resolve()), + filename=file_path.name, + rel_path=( + str(file_path.resolve()).replace(str(self.index_config.path.resolve()), "")[ + 1: + ] + if not self.index_config.path.is_file() + else self.index_config.path.name + ), + ), + metadata=self.get_file_metadata(path=file_path), + ) + yield file_data + + +@dataclass +class LocalDownloaderConfig(DownloaderConfig): + pass + + +@dataclass +class LocalDownloader(Downloader): + connector_type: str = CONNECTOR_TYPE + download_config: Optional[LocalDownloaderConfig] = None + + def get_download_path(self, file_data: FileData) -> Path: + return Path(file_data.source_identifiers.fullpath) + + def run(self, file_data: FileData, **kwargs: Any) -> Path: + return Path(file_data.source_identifiers.fullpath) + + +@dataclass +class LocalUploaderConfig(UploaderConfig): + output_dir: str = field(default="structured-output") + + @property + def output_path(self) -> Path: + return Path(self.output_dir).resolve() + + def __post_init__(self): + if self.output_path.exists() and self.output_path.is_file(): + raise ValueError("output path already exists as a file") + + +@dataclass +class LocalUploader(Uploader): + upload_config: LocalUploaderConfig = field(default_factory=LocalUploaderConfig) + + def is_async(self) -> bool: + return False + + def run(self, contents: list[UploadContent], **kwargs: Any) -> None: + self.upload_config.output_path.mkdir(parents=True, exist_ok=True) + for content in contents: + identifiers = content.file_data.source_identifiers + new_path = self.upload_config.output_path / identifiers.relative_path + final_path = str(new_path).replace( + identifiers.filename, f"{identifiers.filename_stem}.json" + ) + Path(final_path).parent.mkdir(parents=True, exist_ok=True) + logger.debug(f"copying file from {content.path} to {final_path}") + shutil.copy(src=str(content.path), dst=str(final_path)) + + +add_source_entry( + source_type=CONNECTOR_TYPE, + entry=SourceRegistryEntry( + indexer=LocalIndexer, + indexer_config=LocalIndexerConfig, + downloader=LocalDownloader, + downloader_config=LocalDownloaderConfig, + ), +) + +add_destination_entry( + destination_type=CONNECTOR_TYPE, + entry=DestinationRegistryEntry(uploader=LocalUploader, uploader_config=LocalUploaderConfig), +) diff --git a/unstructured/ingest/v2/processes/embedder.py b/unstructured/ingest/v2/processes/embedder.py new file mode 100644 index 0000000000..fa2a2d1245 --- /dev/null +++ b/unstructured/ingest/v2/processes/embedder.py @@ -0,0 +1,79 @@ +from abc import ABC +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +from unstructured.documents.elements import Element +from unstructured.embed.interfaces import BaseEmbeddingEncoder +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field +from unstructured.ingest.v2.interfaces.process import BaseProcess +from unstructured.staging.base import elements_from_json + + +@dataclass +class EmbedderConfig(EnhancedDataClassJsonMixin): + embedding_provider: Optional[str] = None + embedding_api_key: Optional[str] = enhanced_field(default=None, sensitive=True) + embedding_model_name: Optional[str] = None + embedding_aws_access_key_id: Optional[str] = None + embedding_aws_secret_access_key: Optional[str] = None + embedding_aws_region: Optional[str] = None + + def get_embedder(self) -> BaseEmbeddingEncoder: + kwargs: dict[str, Any] = {} + if self.embedding_api_key: + kwargs["api_key"] = self.embedding_api_key + if self.embedding_model_name: + kwargs["model_name"] = self.embedding_model_name + # TODO make this more dynamic to map to encoder configs + if self.embedding_provider == "langchain-openai": + from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder + + return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs)) + elif self.embedding_provider == "langchain-huggingface": + from unstructured.embed.huggingface import ( + HuggingFaceEmbeddingConfig, + HuggingFaceEmbeddingEncoder, + ) + + return HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig(**kwargs)) + elif self.embedding_provider == "octoai": + from unstructured.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder + + return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs)) + elif self.embedding_provider == "langchain-aws-bedrock": + from unstructured.embed.bedrock import BedrockEmbeddingConfig, BedrockEmbeddingEncoder + + return BedrockEmbeddingEncoder( + config=BedrockEmbeddingConfig( + aws_access_key_id=self.embedding_aws_access_key_id, + aws_secret_access_key=self.embedding_aws_secret_access_key, + region_name=self.embedding_aws_region, + ) + ) + elif self.embedding_provider == "langchain-vertexai": + from unstructured.embed.vertexai import ( + VertexAIEmbeddingConfig, + VertexAIEmbeddingEncoder, + ) + + return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs)) + else: + raise ValueError(f"{self.embedding_provider} not a recognized encoder") + + +@dataclass +class Embedder(BaseProcess, ABC): + config: EmbedderConfig + + def is_async(self) -> bool: + # huggingface is run locally rather than via an api call so don't run async + return self.config.embedding_provider not in ["langchain-huggingface"] + + def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: + embedder = self.config.get_embedder() + elements = elements_from_json(filename=str(elements_filepath)) + return embedder.embed_documents(elements=elements) + + async def run_async(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: + return self.run(elements_filepath=elements_filepath, **kwargs) diff --git a/unstructured/ingest/v2/processes/partitioner.py b/unstructured/ingest/v2/processes/partitioner.py new file mode 100644 index 0000000000..4ab872b912 --- /dev/null +++ b/unstructured/ingest/v2/processes/partitioner.py @@ -0,0 +1,125 @@ +from abc import ABC +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +from unstructured.documents.elements import DataSourceMetadata +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.enhanced_dataclass.dataclasses import enhanced_field +from unstructured.ingest.v2.interfaces.process import BaseProcess +from unstructured.ingest.v2.logger import logger +from unstructured.staging.base import elements_to_dicts, flatten_dict + + +@dataclass +class PartitionerConfig(EnhancedDataClassJsonMixin): + strategy: str = "auto" + ocr_languages: Optional[list[str]] = None + encoding: Optional[str] = None + additional_partition_args: dict[str, Any] = field(default_factory=dict) + skip_infer_table_types: Optional[list[str]] = None + fields_include: list[str] = field( + default_factory=lambda: ["element_id", "text", "type", "metadata", "embeddings"], + ) + flatten_metadata: bool = False + metadata_exclude: list[str] = field(default_factory=list) + metadata_include: list[str] = field(default_factory=list) + partition_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general" + partition_by_api: bool = False + api_key: Optional[str] = enhanced_field(default=None, sensitive=True) + hi_res_model_name: Optional[str] = None + + def __post_init__(self): + if self.metadata_exclude and self.metadata_include: + raise ValueError( + "metadata_exclude and metadata_include are " + "mutually exclusive with each other. Cannot specify both." + ) + + def to_partition_kwargs(self) -> dict[str, Any]: + partition_kwargs: dict[str, Any] = { + "strategy": self.strategy, + "languages": self.ocr_languages, + "hi_res_model_name": self.hi_res_model_name, + "skip_infer_table_types": self.skip_infer_table_types, + } + # Don't inject information if None and allow default values in method to be used + partition_kwargs = {k: v for k, v in partition_kwargs.items() if v is not None} + if self.additional_partition_args: + partition_kwargs.update(self.additional_partition_args) + return partition_kwargs + + +@dataclass +class Partitioner(BaseProcess, ABC): + config: PartitionerConfig + + def is_async(self) -> bool: + return self.config.partition_by_api + + def postprocess(self, elements: list[dict]) -> list[dict]: + element_dicts = [e.copy() for e in elements] + for elem in element_dicts: + if self.config.metadata_exclude: + ex_list = self.config.metadata_exclude + for ex in ex_list: + if "." in ex: # handle nested fields + nested_fields = ex.split(".") + current_elem = elem + for f in nested_fields[:-1]: + if f in current_elem: + current_elem = current_elem[f] + field_to_exclude = nested_fields[-1] + if field_to_exclude in current_elem: + current_elem.pop(field_to_exclude, None) + else: # handle top-level fields + elem["metadata"].pop(ex, None) # type: ignore[attr-defined] + elif self.config.metadata_include: + in_list = self.config.metadata_include + for k in list(elem["metadata"].keys()): # type: ignore[attr-defined] + if k not in in_list: + elem["metadata"].pop(k, None) # type: ignore[attr-defined] + in_list = self.config.fields_include + elem = {k: v for k, v in elem.items() if k in in_list} + + if self.config.flatten_metadata and "metadata" in elem: + metadata = elem.pop("metadata") + elem.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"])) + return element_dicts + + def run( + self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs + ) -> list[dict]: + from unstructured.partition.auto import partition + + logger.debug(f"Using local partition with kwargs: {self.config.to_partition_kwargs()}") + elements = partition( + filename=str(filename.resolve()), + data_source_metadata=metadata, + **self.config.to_partition_kwargs(), + ) + return self.postprocess(elements=elements_to_dicts(elements)) + + async def run_async( + self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs + ) -> list[dict]: + from unstructured_client import UnstructuredClient + from unstructured_client.models.shared import Files, PartitionParameters + + client = UnstructuredClient( + server_url=self.config.partition_endpoint, api_key_auth=self.config.api_key + ) + partition_request = self.config.to_partition_kwargs() + with open(filename, "rb") as f: + files = Files( + content=f.read(), + file_name=str(filename.resolve()), + ) + partition_request["files"] = files + partition_params = PartitionParameters(**partition_request) + resp = client.general.partition(partition_params) + elements = resp.elements or [] + # Append the data source metadata the auto partition does for you + for element in elements: + element["metadata"]["data_source"] = metadata.to_dict() + return self.postprocess(elements=elements) diff --git a/unstructured/ingest/v2/processes/uncompress.py b/unstructured/ingest/v2/processes/uncompress.py new file mode 100644 index 0000000000..e0b826461c --- /dev/null +++ b/unstructured/ingest/v2/processes/uncompress.py @@ -0,0 +1,43 @@ +from abc import ABC +from copy import copy +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin +from unstructured.ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file +from unstructured.ingest.v2.interfaces import FileData +from unstructured.ingest.v2.interfaces.process import BaseProcess + + +@dataclass +class UncompressConfig(EnhancedDataClassJsonMixin): + pass + + +@dataclass +class Uncompressor(BaseProcess, ABC): + config: UncompressConfig = field(default_factory=UncompressConfig) + + def is_async(self) -> bool: + return True + + def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]: + local_filepath = Path(file_data.source_identifiers.fullpath) + if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT: + return [file_data] + new_path = uncompress_file(filename=str(local_filepath)) + new_files = [i for i in Path(new_path).rglob("*") if i.is_file()] + responses = [] + for f in new_files: + new_file_data = copy(file_data) + new_file_data.source_identifiers.fullpath = str(f) + if new_file_data.source_identifiers.rel_path: + new_file_data.source_identifiers.rel_path = str(f).replace( + str(local_filepath.parent), "" + )[1:] + responses.append(new_file_data) + return responses + + async def run_async(self, file_data: FileData, **kwargs: Any) -> list[FileData]: + return self.run(file_data=file_data, **kwargs)