From 5d14a2aea068a593add9bbd09815304923c80ce9 Mon Sep 17 00:00:00 2001 From: Christine Straub Date: Thu, 5 Oct 2023 22:16:11 -0700 Subject: [PATCH] feat: shrink bboxes by top left (#1633) Closes #1573. ### Summary - update `shrink_bbox()` to keep top left rather than center ### Evaluation Run the following command for this [PDF](https://utic-dev-tech-fixtures.s3.us-east-2.amazonaws.com/pastebin/patent-11723901-page2.pdf). ``` PYTHONPATH=. python examples/custom-layout-order/evaluate_xy_cut_sorting.py ``` --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> --- CHANGELOG.md | 1 + .../partition/pdf-image/test_pdf.py | 8 +- .../partition/utils/test_sorting.py | 10 +- .../azure/IRS-form-1987.pdf.json | 176 +++++------ .../azure/IRS-form-1987.png.json | 74 ++--- .../biomed-api/65/11/main.PMC6312790.pdf.json | 56 ++-- .../biomed-api/75/29/main.PMC6312793.pdf.json | 40 +-- .../biomed-api/65/11/main.PMC6312790.pdf.json | 42 +-- .../biomed-api/75/29/main.PMC6312793.pdf.json | 54 ++-- .../2023-Jan-economic-outlook.pdf.json | 288 +++++++++--------- .../small-pdf-set/Silent-Giant-(1).pdf.json | 100 +++--- .../recalibrating-risk-report.pdf.json | 216 ++++++------- .../2023-Jan-economic-outlook.pdf.json | 266 ++++++++-------- .../small-pdf-set/Silent-Giant-(1).pdf.json | 68 ++--- .../recalibrating-risk-report.pdf.json | 196 ++++++------ unstructured/partition/utils/sorting.py | 10 +- 16 files changed, 802 insertions(+), 803 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2250111b1..98566c7786 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Enhancements +* **Align to top left when shrinking bounding boxes for `xy-curt` sorting:** Update `shrink_bbox()` to keep top left rather than center * **Add visualization script to annotate elements** This script is often used to analyze/visualize elements with coordinates (e.g. partition_pdf()). * **Adds data source properties to the Jira connector** These properties (date_created, date_modified, version, source_url, record_locator) are written to element metadata during ingest, mapping elements to information about the document source from which they derive. This functionality enables downstream applications to reveal source document applications, e.g. a link to a GDrive doc, Salesforce record, etc. * **Improve title detection in pptx documents** The default title textboxes on a pptx slide are now categorized as titles. diff --git a/test_unstructured/partition/pdf-image/test_pdf.py b/test_unstructured/partition/pdf-image/test_pdf.py index 8ebc621300..e26516a7fd 100644 --- a/test_unstructured/partition/pdf-image/test_pdf.py +++ b/test_unstructured/partition/pdf-image/test_pdf.py @@ -200,9 +200,9 @@ def test_partition_pdf_with_auto_strategy( ): elements = pdf.partition_pdf(filename=filename, strategy="auto") title = "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis" - assert elements[7].text == title - assert elements[7].metadata.filename == "layout-parser-paper-fast.pdf" - assert elements[7].metadata.file_directory == "example-docs" + assert elements[6].text == title + assert elements[6].metadata.filename == "layout-parser-paper-fast.pdf" + assert elements[6].metadata.file_directory == "example-docs" def test_partition_pdf_with_page_breaks( @@ -521,7 +521,7 @@ def test_partition_pdf_with_auto_strategy_exclude_metadata( include_metadata=False, ) title = "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis" - assert elements[7].text == title + assert elements[6].text == title for i in range(len(elements)): assert elements[i].metadata.to_dict() == {} diff --git a/test_unstructured/partition/utils/test_sorting.py b/test_unstructured/partition/utils/test_sorting.py index 2000b4e3a3..d060232c5c 100644 --- a/test_unstructured/partition/utils/test_sorting.py +++ b/test_unstructured/partition/utils/test_sorting.py @@ -116,12 +116,12 @@ def test_coordinates_to_bbox(): def test_shrink_bbox(): - bbox = (0, 0, 100, 100) - shrink_factor = 0.5 - expected_result = (25, 25, 75, 75) + bbox = (0, 0, 200, 100) + shrink_factor = 0.9 + expected_result = (0, 0, 180, 90) assert shrink_bbox(bbox, shrink_factor) == expected_result - bbox = (0, 0, 200, 100) + bbox = (20, 20, 320, 120) shrink_factor = 0.9 - expected_result = (10, 5, 190, 95) + expected_result = (20, 20, 290, 110) assert shrink_bbox(bbox, shrink_factor) == expected_result diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json index fbbd6fcea0..1299ac85a0 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json @@ -323,8 +323,8 @@ "text": "changes." }, { - "type": "UncategorizedText", - "element_id": "e53657178cb6855ac4b2029197a64b0c", + "type": "NarrativeText", + "element_id": "faf2673a7d6b6f7c5bf7cae6770a4130", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -339,11 +339,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "A." + "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." }, { - "type": "NarrativeText", - "element_id": "faf2673a7d6b6f7c5bf7cae6770a4130", + "type": "UncategorizedText", + "element_id": "e53657178cb6855ac4b2029197a64b0c", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -358,7 +358,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." + "text": "A." }, { "type": "NarrativeText", @@ -456,8 +456,8 @@ "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change 1s treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required." }, { - "type": "Title", - "element_id": "5756fb398995bb6518a87637f24f426e", + "type": "NarrativeText", + "element_id": "550f9e99054c657264fb9bb26d3023de", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -472,11 +472,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Time and Place for Filing" + "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." }, { - "type": "NarrativeText", - "element_id": "af8bdf713f162b09567c8d1a3a2d4de7", + "type": "Title", + "element_id": "5756fb398995bb6518a87637f24f426e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -491,11 +491,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Generally, applicants must file this form within the first 180 days of the tax year in which it is desired to make the change." + "text": "Time and Place for Filing" }, { "type": "NarrativeText", - "element_id": "9dda11db48254f5e0d0000afb5d1dd9b", + "element_id": "af8bdf713f162b09567c8d1a3a2d4de7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -510,11 +510,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224." + "text": "Generally, applicants must file this form within the first 180 days of the tax year in which it is desired to make the change." }, { "type": "NarrativeText", - "element_id": "4d063cdbd131401fa29e1d0e824dc017", + "element_id": "9dda11db48254f5e0d0000afb5d1dd9b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -529,11 +529,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." + "text": "Taxpayers, other than exempt organizations, should file Form 3115 with the Commissioner of Internal Revenue, Attention: CC:C:4, 1111 Constitution Avenue, NW, Washington, DC 20224, Exempt organizations should file with the Assistant Commissioner (Employee Plans and Exempt Organizations), 1111 Constitution Avenue, NW, Washington, DC 20224." }, { - "type": "Title", - "element_id": "ea325d761f98c6b73320e442b67f2a35", + "type": "NarrativeText", + "element_id": "4d063cdbd131401fa29e1d0e824dc017", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -548,7 +548,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "an" + "text": "You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." }, { "type": "NarrativeText", @@ -570,8 +570,8 @@ "text": "See section 5.03 of Rev. Proc. 84-74 for filing early application." }, { - "type": "NarrativeText", - "element_id": "12f877f0bd47f9b761ed7e74be1afacd", + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -586,11 +586,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below." + "text": "an" }, { - "type": "Title", - "element_id": "a4316c02df07840f1beb56609cb09735", + "type": "NarrativeText", + "element_id": "12f877f0bd47f9b761ed7e74be1afacd", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -605,11 +605,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Late Applications" + "text": "Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below." }, { - "type": "NarrativeText", - "element_id": "02dd043b5686a46b2f03cfe8cf56aae9", + "type": "Title", + "element_id": "a4316c02df07840f1beb56609cb09735", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -624,11 +624,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "If your application is filed after the 180-day period, it is late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63." + "text": "Late Applications" }, { "type": "NarrativeText", - "element_id": "550f9e99054c657264fb9bb26d3023de", + "element_id": "02dd043b5686a46b2f03cfe8cf56aae9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -643,7 +643,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." + "text": "If your application is filed after the 180-day period, it is late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63." }, { "type": "Title", @@ -684,8 +684,8 @@ "text": "Individuals.—An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." }, { - "type": "Title", - "element_id": "ea325d761f98c6b73320e442b67f2a35", + "type": "NarrativeText", + "element_id": "7d82c5876c5c1a3596338ae8cfbd1a50", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -700,11 +700,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "an" + "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block." }, { - "type": "NarrativeText", - "element_id": "7d82c5876c5c1a3596338ae8cfbd1a50", + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -719,7 +719,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block." + "text": "an" }, { "type": "Title", @@ -1083,8 +1083,8 @@ "text": "Item 11, page 2.—If you cannot provide the requested information, you may sign a statement under penalties of perjury that:" }, { - "type": "Title", - "element_id": "28391d3bc64ec15cbb090426b04aa6b7", + "type": "NarrativeText", + "element_id": "81f087b1fcf4c9870324336c6bc0de78", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1099,11 +1099,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "of" + "text": "(1) Gives your best estimate of the percentage the section 481(a) adjustment that would have been required if the requested change had been made for each of the 3 preceding years; and" }, { - "type": "NarrativeText", - "element_id": "81f087b1fcf4c9870324336c6bc0de78", + "type": "Title", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1118,11 +1118,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) Gives your best estimate of the percentage the section 481(a) adjustment that would have been required if the requested change had been made for each of the 3 preceding years; and" + "text": "of" }, { - "type": "Title", - "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", + "type": "NarrativeText", + "element_id": "cde0777402fde810d0fb24b15df92b2b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1137,11 +1137,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "the" + "text": "(2) Explains in detail why you cannot provide requested information." }, { - "type": "NarrativeText", - "element_id": "cde0777402fde810d0fb24b15df92b2b", + "type": "Title", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1156,7 +1156,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) Explains in detail why you cannot provide requested information." + "text": "the" }, { "type": "NarrativeText", @@ -1178,8 +1178,8 @@ "text": "See section 5.06(2) of Rev. Proc. 84-74 for required perjury statement that must be attached." }, { - "type": "Title", - "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", + "type": "NarrativeText", + "element_id": "1734a701c8a3139ddcb5b857f697318f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1194,7 +1194,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "the" + "text": "If IRS later examines your return for the year change or for later years, it has the right to verify your statement at that time." }, { "type": "Title", @@ -1217,7 +1217,7 @@ }, { "type": "NarrativeText", - "element_id": "1734a701c8a3139ddcb5b857f697318f", + "element_id": "751abc8c6a0fa412c3e8c18345f57f95", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1232,11 +1232,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "If IRS later examines your return for the year change or for later years, it has the right to verify your statement at that time." + "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." }, { "type": "Title", - "element_id": "28391d3bc64ec15cbb090426b04aa6b7", + "element_id": "136a59b0c53731bc299206fda46e0888", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1251,11 +1251,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "of" + "text": "Section B-1" }, { "type": "NarrativeText", - "element_id": "751abc8c6a0fa412c3e8c18345f57f95", + "element_id": "e4a695ea83818204438fe08add6d1554", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1270,11 +1270,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." + "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year in which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application." }, { "type": "Title", - "element_id": "136a59b0c53731bc299206fda46e0888", + "element_id": "f63f53aab435b8c9789ab7d6b982db3f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1289,11 +1289,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Section B-1" + "text": "Sections B-2 and B-3" }, { "type": "NarrativeText", - "element_id": "e4a695ea83818204438fe08add6d1554", + "element_id": "eac562ca19f6198691856c695e2790bd", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1308,11 +1308,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year in which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application." + "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." }, { - "type": "Title", - "element_id": "f63f53aab435b8c9789ab7d6b982db3f", + "type": "NarrativeText", + "element_id": "e5bed7fe04dd22cabe5e5c0362d37743", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1327,11 +1327,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Sections B-2 and B-3" + "text": "The limitation on the use of the cash method (except for tax shelters) does not apply to—" }, { "type": "NarrativeText", - "element_id": "eac562ca19f6198691856c695e2790bd", + "element_id": "69bd87b2ad5873c030748e62adf61b89", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1346,11 +1346,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." + "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." }, { "type": "NarrativeText", - "element_id": "e5bed7fe04dd22cabe5e5c0362d37743", + "element_id": "df67e4b3a4a1352209c2648b87d675e2", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1365,11 +1365,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "The limitation on the use of the cash method (except for tax shelters) does not apply to—" + "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" }, { - "type": "NarrativeText", - "element_id": "69bd87b2ad5873c030748e62adf61b89", + "type": "Title", + "element_id": "794f7062cf3f56f2c7d70702bd3d13e1", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1384,11 +1384,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." + "text": "Page 2" }, { - "type": "NarrativeText", - "element_id": "df67e4b3a4a1352209c2648b87d675e2", + "type": "Title", + "element_id": "b9776d7ddf459c9ad5b0e1d6ac61e27b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1403,11 +1403,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" + "text": "the" }, { "type": "Title", - "element_id": "794f7062cf3f56f2c7d70702bd3d13e1", + "element_id": "28391d3bc64ec15cbb090426b04aa6b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1422,7 +1422,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Page 2" + "text": "of" }, { "type": "NarrativeText", @@ -1539,8 +1539,8 @@ "text": "Inventories of retail merchants.—The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever Is lower, by the adjustments required in Regulations section 1.471-8." }, { - "type": "Title", - "element_id": "1e3abf61a37e3cad36b11b459b1cc39e", + "type": "NarrativeText", + "element_id": "bbd0f86d34b7622cfff546da0c15584d", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1555,11 +1555,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "If" + "text": "LIFO inventory changes.—Attach a schedule with all the required computations when changing the method of figuring LIFO inventories. you are changing from LIFO to a non-LIFO method, attach a schedule with the following additional information:" }, { - "type": "NarrativeText", - "element_id": "bbd0f86d34b7622cfff546da0c15584d", + "type": "Title", + "element_id": "1e3abf61a37e3cad36b11b459b1cc39e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1574,7 +1574,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "LIFO inventory changes.—Attach a schedule with all the required computations when changing the method of figuring LIFO inventories. you are changing from LIFO to a non-LIFO method, attach a schedule with the following additional information:" + "text": "If" }, { "type": "NarrativeText", @@ -1653,8 +1653,8 @@ "text": "(2) proposed and valuation methods conform to the inventory method currently used with respect to non-LIFO Inventories, if any, or how such method is otherwise consistent with Regulations section 1.4726." }, { - "type": "Title", - "element_id": "a7e2d26e8d15814dd9c6a1bdc90585c8", + "type": "NarrativeText", + "element_id": "4a9430201a20b0868ab81c8c9e71b881", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1669,11 +1669,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "by" + "text": "(3) The termination event statement required section 5.10 of Rev. Proc. 84-74 and an explanation if there has been a termination event." }, { - "type": "NarrativeText", - "element_id": "4a9430201a20b0868ab81c8c9e71b881", + "type": "Title", + "element_id": "a7e2d26e8d15814dd9c6a1bdc90585c8", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1688,7 +1688,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(3) The termination event statement required section 5.10 of Rev. Proc. 84-74 and an explanation if there has been a termination event." + "text": "by" }, { "type": "Title", diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json index 0ec3b4cce8..e96add9c2f 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json @@ -1,23 +1,4 @@ [ - { - "type": "UncategorizedText", - "element_id": "e16bce609163ec96985ae522ca81502a", - "metadata": { - "data_source": { - "url": "abfs://container1/IRS-form-1987.png", - "version": 328871203465633719836776597535876541325, - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/IRS-form-1987.png" - }, - "date_created": "2023-03-10T09:44:55+00:00", - "date_modified": "2023-03-10T09:44:55+00:00" - }, - "filetype": "image/png", - "page_number": 1 - }, - "text": "‘A." - }, { "type": "Title", "element_id": "92405c82f76df8b2cbbc6047bd10e0ff", @@ -341,6 +322,25 @@ }, "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B:1 through H) for which a change is desired." }, + { + "type": "UncategorizedText", + "element_id": "e16bce609163ec96985ae522ca81502a", + "metadata": { + "data_source": { + "url": "abfs://container1/IRS-form-1987.png", + "version": 328871203465633719836776597535876541325, + "record_locator": { + "protocol": "abfs", + "remote_file_path": "container1/IRS-form-1987.png" + }, + "date_created": "2023-03-10T09:44:55+00:00", + "date_modified": "2023-03-10T09:44:55+00:00" + }, + "filetype": "image/png", + "page_number": 1 + }, + "text": "‘A." + }, { "type": "NarrativeText", "element_id": "bf2a070cb9d03d056e70b26bebf1ef79", @@ -513,8 +513,8 @@ "text": "You should normally receive an acknowledgment of receipt of your application within 30 days. If you do not hear from IRS within 30 days of submitting your completed Form 3115, you may inquire as to the receipt of your application by writing to: Control Clerk, CC:C:4, Internal Revenue Service, Room 5040, 1111 Constitution Avenue, NW, Washington, DC 20224." }, { - "type": "Title", - "element_id": "ea325d761f98c6b73320e442b67f2a35", + "type": "NarrativeText", + "element_id": "e3e2ccf4f0d1524d4f5ce42e8f2d1efa", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -529,11 +529,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "an" + "text": "See section 5.03 of Rev. Proc. 84-74 for filing early application," }, { - "type": "NarrativeText", - "element_id": "e3e2ccf4f0d1524d4f5ce42e8f2d1efa", + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -548,7 +548,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "See section 5.03 of Rev. Proc. 84-74 for filing early application," + "text": "an" }, { "type": "NarrativeText", @@ -646,8 +646,8 @@ "text": "Individuals. —An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." }, { - "type": "Title", - "element_id": "ea325d761f98c6b73320e442b67f2a35", + "type": "NarrativeText", + "element_id": "e72d9c8a779a47796c4362b7885aa80b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -662,11 +662,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "an" + "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block," }, { - "type": "NarrativeText", - "element_id": "e72d9c8a779a47796c4362b7885aa80b", + "type": "Title", + "element_id": "ea325d761f98c6b73320e442b67f2a35", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -681,7 +681,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block," + "text": "an" }, { "type": "Title", @@ -855,8 +855,8 @@ "text": "Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." }, { - "type": "Title", - "element_id": "ca978112ca1bbdcafac231b39a23dc4d", + "type": "NarrativeText", + "element_id": "8200352b4e91b1be4f14e9248d50380a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -871,11 +871,11 @@ "filetype": "image/png", "page_number": 1 }, - "text": "a" + "text": "Ifthe individual or firm is also authorized to represent the applicant before the IRS, receive copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." }, { - "type": "NarrativeText", - "element_id": "8200352b4e91b1be4f14e9248d50380a", + "type": "Title", + "element_id": "ca978112ca1bbdcafac231b39a23dc4d", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.png", @@ -890,7 +890,7 @@ "filetype": "image/png", "page_number": 1 }, - "text": "Ifthe individual or firm is also authorized to represent the applicant before the IRS, receive copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." + "text": "a" }, { "type": "Title", diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json index 99cb9d99b7..886753dd8d 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json @@ -250,24 +250,24 @@ "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, { - "type": "NarrativeText", - "element_id": "6928b78d26af54b6acb804ed319b5c05", + "type": "Table", + "element_id": "5eb814dac721c11581f011fbca57a17e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "How data were acquired" + "text": "How data were acquired Data format Experimental factors Experimental features Data source location Accessibility Related research article The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO, solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225-230." }, { - "type": "Table", - "element_id": "5eb814dac721c11581f011fbca57a17e", + "type": "NarrativeText", + "element_id": "6928b78d26af54b6acb804ed319b5c05", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "How data were acquired Data format Experimental factors Experimental features Data source location Accessibility Related research article The cleaned and weighed specimen was suspended in beakers con- taining 0.5 M H2SO, solution of different concentrations of egg shell powder. The pre-weighed stainless steel samples were retrieved from the test solutions after every 24h, cleaned appropriately, dried and reweighed. Raw, analyzed The difference between the weight at a given time and the initial weight of the specimen was taken as the weight loss, which was used to calculate the corrosion rate and inhibition efficiency. Inhibitor concentration, exposure time Department of Chemical, Metallurgical and Materials Engineering, Tshwane University of Technology, Pretoria, South Africa Data are available within this article O. Sanni, A. P. I. Popoola, and O. S. I. Fayomi, Enhanced corrosion resistance of stainless steel type 316 in sulphuric acid solution using eco-friendly waste product, Results in Physics, 9 (2018) 225-230." + "text": "How data were acquired" }, { "type": "NarrativeText", @@ -419,6 +419,16 @@ }, "text": "O. Sanni, A.P.I. Popoola / Data in Brief 22 (2019) 451–457" }, + { + "type": "Image", + "element_id": "84d160dc9075c76de6f6d6c3f2651fe3", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 3 + }, + "text": " Corrosion rate (mm/year) 24 48 72 96 120 144 168 192 Exposure time" + }, { "type": "NarrativeText", "element_id": "4f0139b605dfdd9eb93e920a6115e1b5", @@ -449,16 +459,6 @@ }, "text": "i" }, - { - "type": "Image", - "element_id": "84d160dc9075c76de6f6d6c3f2651fe3", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": " Corrosion rate (mm/year) 24 48 72 96 120 144 168 192 Exposure time" - }, { "type": "Title", "element_id": "239bb77f5ec344ce5e614979b8c49742", @@ -621,43 +621,43 @@ }, { "type": "Title", - "element_id": "bcf00b4904f5661d6baef52e7e09e9b1", + "element_id": "362d4a20958df0c88550b9e5d1f2ef5b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "bc (V/dec)" + "text": "Inhibitor concentration (g)" }, { "type": "Title", - "element_id": "12e486f4a9b3a1805bf7e95b5d01847b", + "element_id": "bcf00b4904f5661d6baef52e7e09e9b1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "ba (V/dec)" + "text": "bc (V/dec)" }, { "type": "Title", - "element_id": "7bc31ed7ab5a625735657499f636c1f2", + "element_id": "12e486f4a9b3a1805bf7e95b5d01847b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Ecorr (V)" + "text": "ba (V/dec)" }, { "type": "Title", - "element_id": "362d4a20958df0c88550b9e5d1f2ef5b", + "element_id": "7bc31ed7ab5a625735657499f636c1f2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Inhibitor concentration (g)" + "text": "Ecorr (V)" }, { "type": "Title", @@ -971,23 +971,23 @@ }, { "type": "UncategorizedText", - "element_id": "33a2b57b388470db1cb13defbe73dc18", + "element_id": "825c6ae49ec498c873be5355109ca093", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "(cid:3)" + "text": "(cid:1) Þ ¼ 87:6W DAT" }, { "type": "UncategorizedText", - "element_id": "825c6ae49ec498c873be5355109ca093", + "element_id": "33a2b57b388470db1cb13defbe73dc18", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6 }, - "text": "(cid:1) Þ ¼ 87:6W DAT" + "text": "(cid:3)" }, { "type": "NarrativeText", diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json index 884843e275..1ff53aa1cb 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json @@ -200,24 +200,24 @@ "text": "Specifications table" }, { - "type": "NarrativeText", - "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", + "type": "Table", + "element_id": "765958cb90f3061bda61fe2f973b2acb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" + "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data Tables, text files How data were acquired Artificially generated by a C++ program on Intel\" Xeon” CPU E5- 2670 v2 with Linux operating system. Data format Raw Experimental factors Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Experimental features Randomly generated instances Data source location IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data accessibility Data can be downloaded from https://orlib.uqcloud.net/ Related research article Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457-487 [3]." }, { - "type": "Table", - "element_id": "765958cb90f3061bda61fe2f973b2acb", + "type": "NarrativeText", + "element_id": "5c3978ebc42ea4f11240c221ac3be1cf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 2 }, - "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data Tables, text files How data were acquired Artificially generated by a C++ program on Intel\" Xeon” CPU E5- 2670 v2 with Linux operating system. Data format Raw Experimental factors Sixty randomly generated instances of the MDVSP with the number of depots in (8, 12, 16) and the number of trips in (1500, 2000, 2500, 3000) Experimental features Randomly generated instances Data source location IITB-Monash Research Academy, IIT Bombay, Powai, Mumbai, India. Data accessibility Data can be downloaded from https://orlib.uqcloud.net/ Related research article Kulkarni, S., Krishnamoorthy, M., Ranade, A., Ernst, A.T. and Patil, R., 2018. A new formulation and a column generation-based heuristic for the multiple depot vehicle scheduling problem. Transportation Research Part B: Methodological, 118, pp. 457-487 [3]." + "text": "Subject area Operations research More specific subject area Vehicle scheduling Type of data How data were acquired" }, { "type": "NarrativeText", @@ -500,34 +500,34 @@ "text": "Table 1 Average number of locations, times, vehicles and empty travels for each instance size." }, { - "type": "Title", - "element_id": "0580daab1f34babd90ca1aaa345984f1", + "type": "Table", + "element_id": "1d8fd023cd0978f7a6500815d2ad0ef6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Instance size (m, n)" + "text": "Instance size (m, n) Average number of Locations Times Vehicles Possible empty travels (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 ) (16, 3000 1087.20 1101.60 1284.60 2,684,983.60" }, { "type": "Title", - "element_id": "47a68d3aa70030f2e7886e3f1cb07c69", + "element_id": "0580daab1f34babd90ca1aaa345984f1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Average number of" + "text": "Instance size (m, n)" }, { - "type": "Table", - "element_id": "1d8fd023cd0978f7a6500815d2ad0ef6", + "type": "Title", + "element_id": "47a68d3aa70030f2e7886e3f1cb07c69", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3 }, - "text": "Instance size (m, n) Average number of Locations Times Vehicles Possible empty travels (8, 1500) 568.40 975.20 652.20 668,279.40 (8, 2000) 672.80 1048.00 857.20 1,195,844.80 (8, 2500) 923.40 1078.00 1082.40 1,866,175.20 (8, 3000) 977.00 1113.20 1272.80 2,705,617.00 (12, 1500) 566.00 994.00 642.00 674,191.00 (12, 2000) 732.60 1040.60 861.20 1,199,659.80 (12, 2500) 875.00 1081.00 1096.00 1,878,745.20 (12, 3000) 1119.60 1107.40 1286.20 2,711,180.40 (16, 1500) 581.80 985.40 667.80 673,585.80 (16, 2000) 778.00 1040.60 872.40 1,200,560.80 (16, 2500) 879.00 1083.20 1076.40 1,879,387.00 ) (16, 3000 1087.20 1101.60 1284.60 2,684,983.60" + "text": "Average number of" }, { "type": "Title", @@ -661,33 +661,33 @@ }, { "type": "Title", - "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", + "element_id": "151e509ce97fe40eecae3822c78adcf5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Description" + "text": "Number of lines" }, { "type": "Title", - "element_id": "151e509ce97fe40eecae3822c78adcf5", + "element_id": "0d42fdb9458af19413eee0a1227f415c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Number of lines" + "text": "Number of columns in each line" }, { "type": "Title", - "element_id": "0d42fdb9458af19413eee0a1227f415c", + "element_id": "526e0087cc3f254d9f86f6c7d8e23d95", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "Number of columns in each line" + "text": "Description" }, { "type": "UncategorizedText", diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json index e4b5a5905c..0d26e3b810 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/65/11/main.PMC6312790.pdf.json @@ -183,26 +183,26 @@ "text": "Specification table" }, { - "type": "Title", - "element_id": "b27e559f6c00d2bde61efba5db252e31", + "type": "UncategorizedText", + "element_id": "7379bc18109a0a988413877dd9950c36", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1, "links": [] }, - "text": "Materials engineering" + "text": "Subject area More specific subject area Surface science and engineering Type of data" }, { - "type": "UncategorizedText", - "element_id": "7379bc18109a0a988413877dd9950c36", + "type": "Title", + "element_id": "b27e559f6c00d2bde61efba5db252e31", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 1, "links": [] }, - "text": "Subject area More specific subject area Surface science and engineering Type of data" + "text": "Materials engineering" }, { "type": "Title", @@ -424,17 +424,6 @@ }, "text": "The results of the experiment are presented in this session. The results obtained from weight loss method for stainless steel Type 316 immersed in 0.5 M H2SO4 solution in the absence and presence of different concentrations of egg shell powder (ES) are presented in Figs. 1–3 respectively. It can be seen clearly from these Figures that the efficiency of egg shell powder increase with the inhibitor con- centration, The increase in its efficiency could be as a result of increase in the constituent molecule" }, - { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 2, - "links": [] - }, - "text": "30" - }, { "type": "Title", "element_id": "e28e0dc941accc8694040c63091b580c", @@ -501,6 +490,17 @@ }, "text": "i" }, + { + "type": "UncategorizedText", + "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 2, + "links": [] + }, + "text": "30" + }, { "type": "UncategorizedText", "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", @@ -1691,25 +1691,25 @@ }, { "type": "UncategorizedText", - "element_id": "33a2b57b388470db1cb13defbe73dc18", + "element_id": "825c6ae49ec498c873be5355109ca093", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6, "links": [] }, - "text": "(cid:3)" + "text": "(cid:1) Þ ¼ 87:6W DAT" }, { "type": "UncategorizedText", - "element_id": "825c6ae49ec498c873be5355109ca093", + "element_id": "33a2b57b388470db1cb13defbe73dc18", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 6, "links": [] }, - "text": "(cid:1) Þ ¼ 87:6W DAT" + "text": "(cid:3)" }, { "type": "Title", diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json index 22f0e197ea..eb8b7ab3a5 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/biomed-api/75/29/main.PMC6312793.pdf.json @@ -596,48 +596,48 @@ "text": "i þδ" }, { - "type": "Title", - "element_id": "3feb623147ddb3265b5968ce2efb8f6b", + "type": "NarrativeText", + "element_id": "43dad32a26a446c5a2c74f3f2328b849", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Z te" + "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts" }, { "type": "Title", - "element_id": "a10959d132f2b0d3723ae6b8b77f86b7", + "element_id": "3feb623147ddb3265b5968ce2efb8f6b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "a ls" + "text": "Z te" }, { - "type": "NarrativeText", - "element_id": "a18dff87ecdbfa5d5d8a1ed56f7ce734", + "type": "Title", + "element_id": "a10959d132f2b0d3723ae6b8b77f86b7", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "A trip j can be covered after trip i by the same vehicle, if ts j" + "text": "a ls" }, { "type": "NarrativeText", - "element_id": "43dad32a26a446c5a2c74f3f2328b849", + "element_id": "a18dff87ecdbfa5d5d8a1ed56f7ce734", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": ". If le i ls le i j , otherwise, the vehicle may require waiting at le i for the duration of ðts" + "text": "A trip j can be covered after trip i by the same vehicle, if ts j" }, { "type": "NarrativeText", @@ -706,26 +706,26 @@ "text": "A sufficient number of vehicles are provided to maintain the feasibility of an instance. For each instance size ðm; nÞ, Table 1 provides the average of the number of locations, the number of times, the number of vehicles, and the number of possible empty travels, over five instances. The number of locations includes m distinct locations for depots and the number of locations at which various trips start or end. The number of times includes the start and the end time of the planning horizon and the start/end times for the trips. The number of vehicles is the total number of vehicles from all the depots. The number of possible empty travels is the number of possible connections between trips that require a vehicle travelling empty between two consecutive trips in a schedule." }, { - "type": "Title", - "element_id": "252f10c83610ebca1a059c0bae8255eb", + "type": "NarrativeText", + "element_id": "928fa0dcad70f173bc989ee5715375c5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "f" + "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l" }, { - "type": "NarrativeText", - "element_id": "928fa0dcad70f173bc989ee5715375c5", + "type": "Title", + "element_id": "252f10c83610ebca1a059c0bae8255eb", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "The description of the file for each problem instance is presented in Table 2. The first line in the file provides the number of depots ðmÞ, the number of trips, ðnÞ, and the number of locations ðlÞ, in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1; …; n g, and provides the start location, the start time, the end location, and the end time of trip i. The next l lines present the travel times between any two locations, i; jA 1; …; l" + "text": "f" }, { "type": "UncategorizedText", @@ -914,17 +914,6 @@ }, "text": "S. Kulkarni et al. / Data in Brief 22 (2019) 484–487" }, - { - "type": "UncategorizedText", - "element_id": "9b19f9ab816598a0809e4afd5d60800f", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4, - "links": [] - }, - "text": "487" - }, { "type": "Title", "element_id": "6ad378122bcd6e47bbfc3a3d2c23984a", @@ -1324,5 +1313,16 @@ ] }, "text": "(1994) 41–52." + }, + { + "type": "UncategorizedText", + "element_id": "9b19f9ab816598a0809e4afd5d60800f", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 4, + "links": [] + }, + "text": "487" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json index 364131c8a9..b45c0b74de 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -121,26 +121,26 @@ "text": "COVID-19 deepens China’s slowdown. Economic activity in China slowed in the fourth quarter amid multiple large COVID-19 outbreaks in Beijing and other densely populated localities. Renewed lockdowns accompanied the outbreaks until the relaxation of COVID-19 restrictions in November and December, which paved the way for a full reopening. Real estate investment continued to contract, and developer restructuring is proceeding slowly, amid the lingering property market crisis. Developers have yet to deliver on a large backlog of presold housing, and downward pressure is building on house prices (so far limited by home price floors). The authorities have responded with additional monetary and fiscal policy easing, new vaccination targets for the elderly, and steps to support the completion of unfinished real estate projects. However, consumer and business sentiment remained subdued in late 2022. China’s slowdown has reduced global trade growth and international commodity prices." }, { - "type": "Title", - "element_id": "0cce65035ca66e9be782c845ddd606e2", + "type": "NarrativeText", + "element_id": "c140ad5c30b6075c1a553eddacd8eca5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)" + "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle—such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks’ communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago." }, { - "type": "NarrativeText", - "element_id": "c140ad5c30b6075c1a553eddacd8eca5", + "type": "Title", + "element_id": "0cce65035ca66e9be782c845ddd606e2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Monetary policy starts to bite. Signs are apparent that monetary policy tightening is starting to cool demand and inflation, but the full impact is unlikely to be realized before 2024. Global headline inflation appears to have peaked in the third quarter of 2022 (Figure 1). Prices of fuel and nonfuel commodities have declined, lowering headline inflation, notably in the United States, the euro area, and Latin America. But underlying (core) inflation has not yet peaked in most economies and remains well above pre-pandemic levels. It has persisted amid second-round effects from earlier cost shocks and tight labor markets with robust wage growth as consumer demand has remained resilient. Medium-term inflation expectations generally remain anchored, but some gauges are up. These developments have caused central banks to raise rates faster than expected, especially in the United States and the euro area, and to signal that rates will stay elevated for longer. Core inflation is declining in some economies that have completed their tightening cycle—such as Brazil. Financial markets are displaying high sensitivity to inflation news, with equity markets rising following recent releases of lower inflation data in anticipation of interest rate cuts (Box 1), despite central banks’ communicating their resolve to tighten policy further. With the peak in US headline inflation and an acceleration in rate hikes by several non-US central banks, the dollar has weakened since September but remains significantly stronger than a year ago." + "text": "Figure 1. Twin Peaks? Headline and Core Inflation (Percent, year over year)" }, { "type": "Title", @@ -175,17 +175,6 @@ }, "text": "Euro area" }, - { - "type": "ListItem", - "element_id": "63e35649dd179389ecc7251e1503489a", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3, - "links": [] - }, - "text": "1. Headline Inflation" - }, { "type": "UncategorizedText", "element_id": "808caaef5b114d874a25b7fec21b5516", @@ -198,15 +187,15 @@ "text": "18 16 14 12 10 8 6 4 2 0 –2" }, { - "type": "Title", - "element_id": "babfe67b3ecc6b32db9adb9da08274bf", + "type": "ListItem", + "element_id": "63e35649dd179389ecc7251e1503489a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jan. 22" + "text": "1. Headline Inflation" }, { "type": "Title", @@ -252,49 +241,38 @@ }, "text": "Jan. 20" }, - { - "type": "UncategorizedText", - "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3, - "links": [] - }, - "text": "Jan. 2019" - }, { "type": "Title", - "element_id": "646612b0a62b59fd13be769b4590a9ac", + "element_id": "babfe67b3ecc6b32db9adb9da08274bf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jul. 19" + "text": "Jan. 22" }, { - "type": "Title", - "element_id": "82debf5a182b9b394ad3a9d584a870ef", + "type": "UncategorizedText", + "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jul. 22" + "text": "Jan. 2019" }, { - "type": "ListItem", - "element_id": "b790ab5fcad28bbedb50b568b3adeca2", + "type": "Title", + "element_id": "646612b0a62b59fd13be769b4590a9ac", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "2. Core Inflation" + "text": "Jul. 19" }, { "type": "UncategorizedText", @@ -308,37 +286,37 @@ "text": "16 14 12 10 8 6 4 2 0" }, { - "type": "UncategorizedText", - "element_id": "28a5aa3897d66de6c31caba99a4c337e", + "type": "ListItem", + "element_id": "b790ab5fcad28bbedb50b568b3adeca2", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "–2" + "text": "2. Core Inflation" }, { - "type": "Title", - "element_id": "646612b0a62b59fd13be769b4590a9ac", + "type": "UncategorizedText", + "element_id": "28a5aa3897d66de6c31caba99a4c337e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jul. 19" + "text": "–2" }, { - "type": "Title", - "element_id": "82debf5a182b9b394ad3a9d584a870ef", + "type": "UncategorizedText", + "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jul. 22" + "text": "Jan. 2019" }, { "type": "Title", @@ -363,48 +341,48 @@ "text": "Jul. 20" }, { - "type": "UncategorizedText", - "element_id": "c7c72889cb49cf43d9bd1f892db1be2c", + "type": "Title", + "element_id": "f4a93992a1b09b3fa6200542fd6fde5a", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jan. 2019" + "text": "Jan. 21" }, { "type": "Title", - "element_id": "f4a93992a1b09b3fa6200542fd6fde5a", + "element_id": "81db94f58819ee2fd6c05ddef2082ccc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jan. 21" + "text": "Jul. 21" }, { "type": "Title", - "element_id": "81db94f58819ee2fd6c05ddef2082ccc", + "element_id": "babfe67b3ecc6b32db9adb9da08274bf", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jul. 21" + "text": "Jan. 22" }, { "type": "Title", - "element_id": "babfe67b3ecc6b32db9adb9da08274bf", + "element_id": "646612b0a62b59fd13be769b4590a9ac", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Jan. 22" + "text": "Jul. 19" }, { "type": "NarrativeText", @@ -430,14 +408,25 @@ }, { "type": "Title", - "element_id": "cc874418b59b7ecb37a2c938783fb5ce", + "element_id": "82debf5a182b9b394ad3a9d584a870ef", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 3, "links": [] }, - "text": "Nov. 22" + "text": "Jul. 22" + }, + { + "type": "Title", + "element_id": "82debf5a182b9b394ad3a9d584a870ef", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 3, + "links": [] + }, + "text": "Jul. 22" }, { "type": "Title", @@ -451,15 +440,15 @@ "text": "Nov. 22" }, { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "type": "Title", + "element_id": "cc874418b59b7ecb37a2c938783fb5ce", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 4, + "page_number": 3, "links": [] }, - "text": "" + "text": "Nov. 22" }, { "type": "NarrativeText", @@ -517,26 +506,26 @@ "text": "For advanced economies, growth is projected to decline sharply from 2.7 percent in 2022 to 1.2 percent in 2023 before rising to 1.4 percent in 2024, with a downward revision of 0.2 percentage point for 2024. About 90 percent of advanced economies are projected to see a decline in growth in 2023." }, { - "type": "NarrativeText", - "element_id": "e8461ba4bbf4110ce195ca03366ae1f0", + "type": "ListItem", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced" + "text": "" }, { - "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "type": "NarrativeText", + "element_id": "e8461ba4bbf4110ce195ca03366ae1f0", "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 5, + "page_number": 4, "links": [] }, - "text": "" + "text": "In the United States, growth is projected to fall from 2.0 percent in 2022 to 1.4 percent in 2023 and 1.0 percent in 2024. With growth rebounding in the second half of 2024, growth in 2024 will be faster than in 2023 on a fourth-quarter-over-fourth-quarter basis, as in most advanced" }, { "type": "ListItem", @@ -621,7 +610,7 @@ "metadata": { "data_source": {}, "filetype": "application/pdf", - "page_number": 6, + "page_number": 5, "links": [] }, "text": "" @@ -648,6 +637,17 @@ }, "text": "Growth in the Middle East and Central Asia is projected to decline from 5.3 percent in 2022 to 3.2 percent in 2023, with a downward revision of 0.4 percentage point since October, mainly attributable to a steeper-than-expected growth slowdown in Saudi Arabia, from 8.7 percent in 2022 (which was stronger than expected by 1.1 percentage points) to 2.6 percent in 2023, with a negative revision of 1.1 percentage points. The downgrade for 2023 reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa, growth is projected to remain moderate at 3.8 percent in 2023 amid prolonged fallout from the COVID-19 pandemic, although with a modest upward revision since October, before picking up to 4.1 percent in 2024. The small upward revision for 2023 (0.1 percentage point) reflects Nigeria’s rising growth in 2023 due to measures to address insecurity issues in the oil sector. In South Africa, by contrast, after a COVID-19 reopening rebound in 2022, projected growth more than halves in 2023, to 1.2 percent, reflecting weaker external demand, power shortages, and structural constraints." }, + { + "type": "ListItem", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 6, + "links": [] + }, + "text": "" + }, { "type": "NarrativeText", "element_id": "8ffcfc8eb8488c2cca522b99de891877", @@ -1750,14 +1750,14 @@ }, { "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "" + "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." }, { "type": "ListItem", @@ -1772,14 +1772,14 @@ }, { "type": "ListItem", - "element_id": "90a90e12a4c6b8b74d3c8d20a76f22dc", + "element_id": "42ac57e394bf7c98d908745cefce0b80", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems." + "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." }, { "type": "ListItem", @@ -1794,25 +1794,25 @@ }, { "type": "ListItem", - "element_id": "42ac57e394bf7c98d908745cefce0b80", + "element_id": "2d14934d52ff357c52e9ae1c38f7390e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase." + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." }, { "type": "ListItem", - "element_id": "2d14934d52ff357c52e9ae1c38f7390e", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." + "text": "" }, { "type": "ListItem", @@ -2288,103 +2288,48 @@ "text": "Slowing aggregate demand and weaker-than-expected inflation prints in some major advanced economies have prompted investors’ anticipation of a further reduction in the pace of future policy rate hikes. Corporate earnings forecasts have been cut due to headwinds from slowing demand, and margins have contracted across most regions. In addition, survey-based probabilities of recession have been increasing, particularly in the United States and Europe. However, upside risks to the inflation outlook remain. Despite the recent moderation in headline inflation, core inflation remains stubbornly high across most regions, labor markets are still tight, energy prices remain pressured by Russia’s ongoing war in Ukraine, and supply chain disruptions may reappear. To keep these risks in check, financial conditions will likely need to tighten further. If not, central banks may need to increase policy rates even more in order to achieve their inflation objectives." }, { - "type": "UncategorizedText", - "element_id": "e7f6c011776e8db7cd330b54174fd76f", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11, - "links": [] - }, - "text": "6" - }, - { - "type": "UncategorizedText", - "element_id": "ef2d127de37b942baad06145e54b0c61", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11, - "links": [] - }, - "text": "5" - }, - { - "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11, - "links": [] - }, - "text": "4" - }, - { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11, - "links": [] - }, - "text": "3" - }, - { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 11, - "links": [] - }, - "text": "2" - }, - { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "type": "Title", + "element_id": "6ef230728534d871e5126e2a55e12b26", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11, "links": [] }, - "text": "1" + "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)" }, { "type": "Title", - "element_id": "6ef230728534d871e5126e2a55e12b26", + "element_id": "8730d3c2022abf1f9665e4ca1da43e4d", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11, "links": [] }, - "text": "Figure 1.2. Market-Implied Expectations of Policy Rates (Percent)" + "text": "Latest" }, { "type": "Title", - "element_id": "8730d3c2022abf1f9665e4ca1da43e4d", + "element_id": "53d79cec96694df67ce3baff95d8a2e3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11, "links": [] }, - "text": "Latest" + "text": "October 2022 GFSR" }, { - "type": "Title", - "element_id": "53d79cec96694df67ce3baff95d8a2e3", + "type": "UncategorizedText", + "element_id": "e7f6c011776e8db7cd330b54174fd76f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 11, "links": [] }, - "text": "October 2022 GFSR" + "text": "6" }, { "type": "ListItem", @@ -2616,5 +2561,60 @@ "links": [] }, "text": "WEO Update © 2023 • ISBN: 979-8-40023-224-4" + }, + { + "type": "UncategorizedText", + "element_id": "ef2d127de37b942baad06145e54b0c61", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "5" + }, + { + "type": "UncategorizedText", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "4" + }, + { + "type": "UncategorizedText", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "3" + }, + { + "type": "UncategorizedText", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "2" + }, + { + "type": "UncategorizedText", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 11, + "links": [] + }, + "text": "1" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json index 46113691ff..309e2edbd2 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/Silent-Giant-(1).pdf.json @@ -1166,59 +1166,59 @@ "text": "r e p s e i t i l" }, { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "type": "UncategorizedText", + "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "a t a F" + "text": "80" }, { "type": "UncategorizedText", - "element_id": "48449a14a4ff7d79bb7a1b6f3d488eba", + "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "80" + "text": "60" }, { "type": "UncategorizedText", - "element_id": "39fa9ec190eee7b6f4dff1100d6343e1", + "element_id": "ce3201efc2e495241a85e4fc84575f50", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "60" + "text": "71.9" }, { - "type": "UncategorizedText", - "element_id": "d59eced1ded07f84c145592f65bdf854", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "40" + "text": "a t a F" }, { "type": "UncategorizedText", - "element_id": "ce3201efc2e495241a85e4fc84575f50", + "element_id": "d59eced1ded07f84c145592f65bdf854", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 8, "links": [] }, - "text": "71.9" + "text": "40" }, { "type": "UncategorizedText", @@ -1925,169 +1925,169 @@ "text": "8" }, { - "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "type": "NarrativeText", + "element_id": "3c6336f12bcbf4d1ca36bef92d77efea", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "i" + "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet – all powered by the atom – we are able to address one of the key challenges to a sustainable economy." }, { - "type": "Title", - "element_id": "5d7f49449ab22deac22d767b89549c55", + "type": "NarrativeText", + "element_id": "f3e39d107b4601c15dbb3d83ed7a7d9c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "ii" + "text": "We cannot afford to wait – we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences." }, { - "type": "Title", - "element_id": "f5557d4fcf727a981a3c315aca733eef", + "type": "NarrativeText", + "element_id": "d263fe9467aa7876c4d5009c3125176b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "iii" + "text": "Nuclear power is the silent giant of today’s energy system – it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world – enabling us to pass on a cleaner planet to our children." }, { "type": "Title", - "element_id": "0ab306823035661bb8dba21cc2535231", + "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "iv" + "text": "References" }, { "type": "Title", - "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", + "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "v" + "text": "i" }, { "type": "Title", - "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", + "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "vi" + "text": "ii" }, { "type": "Title", - "element_id": "c1d2906220d1eef1b17422b7132872a8", + "element_id": "f5557d4fcf727a981a3c315aca733eef", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "vii" + "text": "iii" }, { "type": "Title", - "element_id": "ed171375d0bf81eaa5512140c3a29b8f", + "element_id": "0ab306823035661bb8dba21cc2535231", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "ix" + "text": "iv" }, { "type": "Title", - "element_id": "2d711642b726b04401627ca9fbac32f5", + "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "x" + "text": "v" }, { - "type": "NarrativeText", - "element_id": "3c6336f12bcbf4d1ca36bef92d77efea", + "type": "Title", + "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "Nuclear energy can be relied upon to power the new mobility revolution taking place. Every day, we use almost 20 million barrels of oil to power our vehicles. By swapping to an electric or hydrogen-powered transport fleet – all powered by the atom – we are able to address one of the key challenges to a sustainable economy." + "text": "vi" }, { - "type": "NarrativeText", - "element_id": "f3e39d107b4601c15dbb3d83ed7a7d9c", + "type": "Title", + "element_id": "c1d2906220d1eef1b17422b7132872a8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "We cannot afford to wait – we need every part of the puzzle to contribute towards solving some of the greatest challenges faced by humankind in a very long time. The impacts of climate change will hit the poorest and most vulnerable first, and failing to act will have significant humanitarian consequences." + "text": "vii" }, { "type": "NarrativeText", - "element_id": "d263fe9467aa7876c4d5009c3125176b", + "element_id": "de72de35f0092bdd3107011f3be18dc0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "Nuclear power is the silent giant of today’s energy system – it runs quietly in the background, capable of delivering immense amounts of power, regardless of weather or season, allowing us to focus on everything else in life. It is a technology that is available now, and can be expanded quickly across the world to help us solve some of the most defining challenges we face. Nuclear energy holds the potential to herald a new, cleaner and truly sustainable world – enabling us to pass on a cleaner planet to our children." + "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf" }, { - "type": "Title", - "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", + "type": "NarrativeText", + "element_id": "b6396ecd6f60e3dcca17c045c00846c1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "References" + "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)" }, { - "type": "NarrativeText", - "element_id": "de72de35f0092bdd3107011f3be18dc0", + "type": "Title", + "element_id": "ed171375d0bf81eaa5512140c3a29b8f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "International Energy Agency (2018), World Energy Outlook 2018. Data accessed from https://www.iea.org/weo/ – Based on the New Policies Scenario, which incorporates existing energy policies as well as an assessment of the results likely to stem from the implementation of announced policy intentions – with visual modification by World Nuclear Association. International Energy Agency (n.d.), Statistics. Accessed from: https://www.iea.org/statistics/?country=WORLD&year=2016&category=Electricity&indicator=ElecGenByFuel&mode =chart&dataTable=ELECTRICITYANDHEAT – with visual modifications by World Nuclear Association. International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ Intergovernmental Panel on Climate Change (2018), Special Report on Global Warming of 1.5 °C. Accessed from: https://www.ipcc.ch/sr15/ International Energy Agency (2019), Nuclear Power in a Clean Energy System. Accessed from: https://www.iea.org/ publications/nuclear/ International Energy Agency & OECD Nuclear Energy Agency (2015), Projected Costs of generating Electricity – 2015 Edition. Accessed from: https://www.oecd-nea.org/ndd/pubs/2015/7057-proj-costs-electricity-2015.pdf International Atomic Energy Agency (2015), Technical challenges in the application and licensing of digital instrumentation and control systems in nuclear power plants. Accessed from: https://www-pub.iaea.org/MTCD/ Publications/PDF/P1695_web.pdf" + "text": "ix" }, { - "type": "NarrativeText", - "element_id": "b6396ecd6f60e3dcca17c045c00846c1", + "type": "Title", + "element_id": "2d711642b726b04401627ca9fbac32f5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "viii Paul-Scherrer Institute. Data for nuclear accidents modified to reflect UNSCEAR findings/recommendations (2012)" + "text": "x" }, { "type": "UncategorizedText", diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json index 2902ab9513..7c290e8639 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/s3/small-pdf-set/recalibrating-risk-report.pdf.json @@ -154,26 +154,26 @@ "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity." }, { - "type": "Title", - "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", + "type": "NarrativeText", + "element_id": "45e9c81bf6ccdc498a6ac5640d786736", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "Rank Order Laypersons" + "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." }, { - "type": "NarrativeText", - "element_id": "45e9c81bf6ccdc498a6ac5640d786736", + "type": "Title", + "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." + "text": "Rank Order Laypersons" }, { "type": "Title", @@ -231,26 +231,26 @@ "text": "1" }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "Title", + "element_id": "602d25f25cca4ebb709f8b48f54d99d9", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "2" + "text": "Motor vehicles" }, { - "type": "Title", - "element_id": "602d25f25cca4ebb709f8b48f54d99d9", + "type": "UncategorizedText", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "Motor vehicles" + "text": "2" }, { "type": "UncategorizedText", @@ -264,26 +264,26 @@ "text": "4" }, { - "type": "Title", - "element_id": "f8e3740e358309bd0570d4f3ca141793", + "type": "UncategorizedText", + "element_id": "4e07408562bedb8b60ce05c1decfe3ad", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "Handguns" + "text": "3" }, { - "type": "UncategorizedText", - "element_id": "4e07408562bedb8b60ce05c1decfe3ad", + "type": "Title", + "element_id": "f8e3740e358309bd0570d4f3ca141793", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "3" + "text": "Handguns" }, { "type": "UncategorizedText", @@ -396,15 +396,15 @@ "text": "" }, { - "type": "Title", - "element_id": "2f3122790ccc9e095abe1b5ceedddf88", + "type": "UncategorizedText", + "element_id": "785f3ec7eb32f30b90cd0fcf3657d388", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "X-rays" + "text": "22" }, { "type": "UncategorizedText", @@ -418,15 +418,15 @@ "text": "7" }, { - "type": "UncategorizedText", - "element_id": "785f3ec7eb32f30b90cd0fcf3657d388", + "type": "Title", + "element_id": "2f3122790ccc9e095abe1b5ceedddf88", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "22" + "text": "X-rays" }, { "type": "UncategorizedText", @@ -451,15 +451,15 @@ "text": "" }, { - "type": "UncategorizedText", - "element_id": "b7a56873cd771f2c446d369b649430b6", + "type": "Title", + "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "25" + "text": "Vaccinations" }, { "type": "UncategorizedText", @@ -473,15 +473,15 @@ "text": "30" }, { - "type": "Title", - "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", + "type": "UncategorizedText", + "element_id": "b7a56873cd771f2c446d369b649430b6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4, "links": [] }, - "text": "Vaccinations" + "text": "25" }, { "type": "Title", @@ -528,213 +528,213 @@ "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since." }, { - "type": "NarrativeText", - "element_id": "e11247712b3df61756970b45f019ad68", + "type": "Title", + "element_id": "d6acb6d51cfc574936fc79bc06b8a371", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "r a e y" + "text": "Natural" }, { "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "element_id": "8c3274ea479fd4a25c0b5611a8e48662", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "e" + "text": "Artificial" }, { - "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "type": "UncategorizedText", + "element_id": "d4a293a7987bc37f4a826e0da1961aab", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "W T" + "text": " 48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron" }, { - "type": "NarrativeText", - "element_id": "f9bb49945b60897227abdd75b5f8d39b", + "type": "UncategorizedText", + "element_id": "0f748653e413fbddbb18262352d56b23", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "r e p s e i t i l" + "text": " 11% Medicine  0.4%  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges" }, { "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "element_id": "039bede24e51e7c42ce352c25b6427c0", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "a t a F" + "text": "Fallout" }, { "type": "Title", - "element_id": "d6acb6d51cfc574936fc79bc06b8a371", + "element_id": "9f3d0ae9a00bcefb94ac8bd0cd5a5da3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "Natural" + "text": "Figure 2. Global average exposure from different sources of radiation" }, { - "type": "Title", - "element_id": "8c3274ea479fd4a25c0b5611a8e48662", + "type": "NarrativeText", + "element_id": "f170516281e47bab0dcbdcc3f7834e25", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "Artificial" + "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." }, { - "type": "UncategorizedText", - "element_id": "d4a293a7987bc37f4a826e0da1961aab", + "type": "NarrativeText", + "element_id": "f2f020a2d66ed9c32afcc917fe19bde8", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": " 48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron" + "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." }, { "type": "UncategorizedText", - "element_id": "0f748653e413fbddbb18262352d56b23", + "element_id": "b7a56873cd771f2c446d369b649430b6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": " 11% Medicine  0.4%  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges" + "text": "25" }, { - "type": "Title", - "element_id": "039bede24e51e7c42ce352c25b6427c0", + "type": "UncategorizedText", + "element_id": "6a3adc54db5128f797d4a12855193373", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "Fallout" + "text": "24.6" }, { - "type": "Title", - "element_id": "9f3d0ae9a00bcefb94ac8bd0cd5a5da3", + "type": "UncategorizedText", + "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "Figure 2. Global average exposure from different sources of radiation" + "text": "20" }, { - "type": "NarrativeText", - "element_id": "f170516281e47bab0dcbdcc3f7834e25", + "type": "UncategorizedText", + "element_id": "dfb6b8c404e0fa2b32def4ba49e00b3c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." + "text": "18.4" }, { "type": "NarrativeText", - "element_id": "f2f020a2d66ed9c32afcc917fe19bde8", + "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." + "text": "r a e y" }, { - "type": "UncategorizedText", - "element_id": "b7a56873cd771f2c446d369b649430b6", + "type": "Title", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "25" + "text": "e" }, { "type": "UncategorizedText", - "element_id": "6a3adc54db5128f797d4a12855193373", + "element_id": "e629fa6598d732768f7c726b4b621285", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "24.6" + "text": "15" }, { - "type": "UncategorizedText", - "element_id": "f5ca38f748a1d6eaf726b8a42fb575c3", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "20" + "text": "W T" }, { - "type": "UncategorizedText", - "element_id": "dfb6b8c404e0fa2b32def4ba49e00b3c", + "type": "NarrativeText", + "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "18.4" + "text": "r e p s e i t i l" }, { "type": "UncategorizedText", - "element_id": "e629fa6598d732768f7c726b4b621285", + "element_id": "4a44dc15364204a80fe80e9039455cc1", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "15" + "text": "10" }, { - "type": "UncategorizedText", - "element_id": "4a44dc15364204a80fe80e9039455cc1", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5, "links": [] }, - "text": "10" + "text": "a t a F" }, { "type": "UncategorizedText", @@ -1277,135 +1277,135 @@ }, { "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "i" + "text": "References" }, { "type": "Title", - "element_id": "5d7f49449ab22deac22d767b89549c55", + "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "ii" + "text": "i" }, { - "type": "Title", - "element_id": "f5557d4fcf727a981a3c315aca733eef", + "type": "NarrativeText", + "element_id": "e72fdf383c0b4d8cba0284d4f7ff06d5", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "iii" + "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" }, { "type": "Title", - "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", + "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "v" + "text": "ii" }, { - "type": "Title", - "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", + "type": "NarrativeText", + "element_id": "e4d7c811a799c3c8e706125556f8a370", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "vi" + "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" }, { "type": "Title", - "element_id": "69824d3b0e70ca6aaa0da1613b65fd91", + "element_id": "f5557d4fcf727a981a3c315aca733eef", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "References" + "text": "iii" }, { "type": "NarrativeText", - "element_id": "e72fdf383c0b4d8cba0284d4f7ff06d5", + "element_id": "2ef1e8614bc32af635d2a0c894b2ed3c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" + "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." }, { "type": "NarrativeText", - "element_id": "e4d7c811a799c3c8e706125556f8a370", + "element_id": "4051afedda98549176dc28aaa9087e81", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" + "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific" }, { "type": "NarrativeText", - "element_id": "2ef1e8614bc32af635d2a0c894b2ed3c", + "element_id": "98e5f594de0e79990a0650489fdf295c", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." + "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" }, { - "type": "NarrativeText", - "element_id": "4051afedda98549176dc28aaa9087e81", + "type": "Title", + "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "iv United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific" + "text": "v" }, { "type": "NarrativeText", - "element_id": "98e5f594de0e79990a0650489fdf295c", + "element_id": "c328c06c32c00c43471cd3c9d257c68b", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "Committee on the Effects of Atomic Radiation. Accessed from: https://www.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" + "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" }, { - "type": "NarrativeText", - "element_id": "c328c06c32c00c43471cd3c9d257c68b", + "type": "Title", + "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 10, "links": [] }, - "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" + "text": "vi" }, { "type": "NarrativeText", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json index 94a28f07c6..11b0573a02 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -289,7 +289,7 @@ }, { "type": "Title", - "element_id": "49dca65f362fee401292ed7ada96f962", + "element_id": "323d79e74460eda1fb0f8d55a2e0ff42", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -303,11 +303,11 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "United States" + "text": "Median country Brazil" }, { "type": "Title", - "element_id": "323d79e74460eda1fb0f8d55a2e0ff42", + "element_id": "49dca65f362fee401292ed7ada96f962", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -321,7 +321,7 @@ "filetype": "application/pdf", "page_number": 3 }, - "text": "Median country Brazil" + "text": "United States" }, { "type": "Title", @@ -793,7 +793,7 @@ }, { "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "element_id": "afde979c99a73646915fe253c85c5a9c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -807,11 +807,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "" + "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" }, { "type": "ListItem", - "element_id": "afde979c99a73646915fe253c85c5a9c", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -825,7 +825,7 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Growth in emerging and developing Europe is projected to have bottomed out in 2022 at 0.7 percent and, since the October forecast, has been revised up for 2023 by 0.9 percentage point to 1.5 percent. This reflects a smaller economic contraction in Russia in 2022 (estimated at –2.2 percent compared with a predicted –3.4 percent) followed by modestly positive growth in 2023. At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean, growth is projected to decline from 3.9 percent in 2022 to 1.8 percent in 2023, with an upward revision for 2023 of 0.1 percentage point since October. The forecast revision reflects upgrades of 0.2 percentage point for Brazil and 0.5 percentage point for Mexico due to unexpected domestic demand resilience, higher-than-expected growth in" + "text": "" }, { "type": "ListItem", @@ -1098,8 +1098,8 @@ "text": "Q4 over Q4 2/" }, { - "type": "Title", - "element_id": "1968c7f7ac8a3b0483f733357bb50b16", + "type": "Table", + "element_id": "af79981b9ad6dea2ab3fa92cb5954958", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1113,11 +1113,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "WEO Projections 1/" + "text": "over Estimate___ Projections WEO Projections 1/ Estimate Projections 2021 2022 2023 2024 2023 2024 2022 2023 2024 World Output 6.2 34 29 34 0.2 0.1 1.9 3.2 3.0 Advanced Economies 5.4 27 1.2 14 04 0.2 1.3 14 1.6 United States 5.9 2.0 14 1.0 04 -0.2 07 1.0 13 Euro Area 5.3 3.5 07 16 0.2 -0.2 19 0.5 24 Germany 26 19 01 14 04 0.1 14 0.0 23 France 68 26 07 16 0.0 0.0 0.5 09 18 Italy 67 3.9 06 0.9 08 -04 21 0.1 1.0 Spain 5.5 5.2 14 24 -0.1 -0.2 21 13 28 Japan 21 14 18 0.9 0.2 -04 17 1.0 1.0 United Kingdom 76 41 -06 0.9 -0.9 03 04 -05 18 Canada 5.0 3.5 15 15 0.0 0.1 23 12 1.9 Other Advanced Economies 3/ 5.3 28 20 24 -03 02 14 2a 2.2 Emerging Market and Developing Economies 67 3.9 40 42 0.3 -0.1 25 5.0 4A Emerging and Developing Asia 74 43 5.3 5.2 04 0.0 3.4 6.2 49 China 84 3.0 5.2 45 08 0.0 29 5.9 41 India 4/ 87 68 61 68 0.0 0.0 43 70 7A Emerging and Developing Europe 69 07 15 26 0.9 01 -2.0 3.5 28 Russia 47 -2.2 0.3 21 26 06 441 1.0 2.0 Latin America and the Caribbean 7.0 3.9 18 2a 04 0.3 26 1.9 19 Brazil 5.0 34 12 15 0.2 -04 28 0.8 22 Mexico 47 34 47 16 05 -0.2 37 14 1.9 Middle East and Central Asia 45 5.3 3.2 37 -04 0.2 . . . Saudi Arabia 3.2 87 26 34 -11 0.5 46 27 35 Sub-Saharan Africa 47 38 38 41 04 0.0 = ao ao Nigeria 3.6 3.0 3.2 29 0.2 0.0 26 31 29 South Africa 49 26 12 13 01 0.0 3.0 0.5 18 Memorandum World Growth Based on Market Exchange Rates 6.0 3.41 24 25 03 -0.1 17 25 25 European Union 5.5 37 07 18 0.0 -0.3 18 1.2 2.0 ASEAN-5 5/ 3.8 5.2 43 47 0.2 -0.2 37 57 40 Middle East and North Africa 41 54 3.2 35 -04 0.2 a . . Emerging Market and Middle-Income Economies 70 38 40 44 04 0.0 25 5.0 44 Low-Income Developing Countries 441 49 49 56 0.0 01 World Trade Volume (goods and services) 6/ 10.4 5.4 24 3.4 -01 -0.3 Advanced Economies 94 66 23 27 0.0 -04 Emerging Market and Developing Economies 124 34 26 46 03 0.0 Commodity Prices 7/ 65.8 39.8 -16.2 71 33 -0.9 11.2 -98 59 Nonfuel (average based on world commodity import weights) 26.4 70 -6.3 -0.4 -01 03 -2.0 14 -0.2" }, { "type": "Title", - "element_id": "b88d850d87e55cb1fd14ae67e5644d57", + "element_id": "1968c7f7ac8a3b0483f733357bb50b16", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1131,11 +1131,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Estimate 2022" + "text": "WEO Projections 1/" }, { "type": "Title", - "element_id": "18665f77847d326417463628d8860261", + "element_id": "b88d850d87e55cb1fd14ae67e5644d57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1149,7 +1149,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Projections 2023" + "text": "Estimate 2022" }, { "type": "Title", @@ -1188,8 +1188,8 @@ "text": "Projections 2023" }, { - "type": "UncategorizedText", - "element_id": "6557739a67283a8de383fc5c0997fbec", + "type": "Title", + "element_id": "18665f77847d326417463628d8860261", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1203,11 +1203,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2024" + "text": "Projections 2023" }, { "type": "UncategorizedText", - "element_id": "d398b29d3dbbb9bf201d4c7e1c19ff9d", + "element_id": "6557739a67283a8de383fc5c0997fbec", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1221,11 +1221,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2023" + "text": "2024" }, { "type": "UncategorizedText", - "element_id": "1bea20e1df19b12013976de2b5e0e3d1", + "element_id": "6557739a67283a8de383fc5c0997fbec", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1239,7 +1239,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2021" + "text": "2024" }, { "type": "UncategorizedText", @@ -1261,7 +1261,7 @@ }, { "type": "UncategorizedText", - "element_id": "6557739a67283a8de383fc5c0997fbec", + "element_id": "1bea20e1df19b12013976de2b5e0e3d1", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1275,11 +1275,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2024" + "text": "2021" }, { - "type": "Table", - "element_id": "af79981b9ad6dea2ab3fa92cb5954958", + "type": "UncategorizedText", + "element_id": "d398b29d3dbbb9bf201d4c7e1c19ff9d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1293,11 +1293,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "over Estimate___ Projections WEO Projections 1/ Estimate Projections 2021 2022 2023 2024 2023 2024 2022 2023 2024 World Output 6.2 34 29 34 0.2 0.1 1.9 3.2 3.0 Advanced Economies 5.4 27 1.2 14 04 0.2 1.3 14 1.6 United States 5.9 2.0 14 1.0 04 -0.2 07 1.0 13 Euro Area 5.3 3.5 07 16 0.2 -0.2 19 0.5 24 Germany 26 19 01 14 04 0.1 14 0.0 23 France 68 26 07 16 0.0 0.0 0.5 09 18 Italy 67 3.9 06 0.9 08 -04 21 0.1 1.0 Spain 5.5 5.2 14 24 -0.1 -0.2 21 13 28 Japan 21 14 18 0.9 0.2 -04 17 1.0 1.0 United Kingdom 76 41 -06 0.9 -0.9 03 04 -05 18 Canada 5.0 3.5 15 15 0.0 0.1 23 12 1.9 Other Advanced Economies 3/ 5.3 28 20 24 -03 02 14 2a 2.2 Emerging Market and Developing Economies 67 3.9 40 42 0.3 -0.1 25 5.0 4A Emerging and Developing Asia 74 43 5.3 5.2 04 0.0 3.4 6.2 49 China 84 3.0 5.2 45 08 0.0 29 5.9 41 India 4/ 87 68 61 68 0.0 0.0 43 70 7A Emerging and Developing Europe 69 07 15 26 0.9 01 -2.0 3.5 28 Russia 47 -2.2 0.3 21 26 06 441 1.0 2.0 Latin America and the Caribbean 7.0 3.9 18 2a 04 0.3 26 1.9 19 Brazil 5.0 34 12 15 0.2 -04 28 0.8 22 Mexico 47 34 47 16 05 -0.2 37 14 1.9 Middle East and Central Asia 45 5.3 3.2 37 -04 0.2 . . . Saudi Arabia 3.2 87 26 34 -11 0.5 46 27 35 Sub-Saharan Africa 47 38 38 41 04 0.0 = ao ao Nigeria 3.6 3.0 3.2 29 0.2 0.0 26 31 29 South Africa 49 26 12 13 01 0.0 3.0 0.5 18 Memorandum World Growth Based on Market Exchange Rates 6.0 3.41 24 25 03 -0.1 17 25 25 European Union 5.5 37 07 18 0.0 -0.3 18 1.2 2.0 ASEAN-5 5/ 3.8 5.2 43 47 0.2 -0.2 37 57 40 Middle East and North Africa 41 54 3.2 35 -04 0.2 a . . Emerging Market and Middle-Income Economies 70 38 40 44 04 0.0 25 5.0 44 Low-Income Developing Countries 441 49 49 56 0.0 01 World Trade Volume (goods and services) 6/ 10.4 5.4 24 3.4 -01 -0.3 Advanced Economies 94 66 23 27 0.0 -04 Emerging Market and Developing Economies 124 34 26 46 03 0.0 Commodity Prices 7/ 65.8 39.8 -16.2 71 33 -0.9 11.2 -98 59 Nonfuel (average based on world commodity import weights) 26.4 70 -6.3 -0.4 -01 03 -2.0 14 -0.2" + "text": "2023" }, { "type": "UncategorizedText", - "element_id": "72d73db944cf6d9a5f11d6c073c1dce0", + "element_id": "69dfc187e2e6d907a0546f7e76f8ee3f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1311,11 +1311,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.4" + "text": "6.2" }, { "type": "UncategorizedText", - "element_id": "44896b09365746b5f7167ee4d64988a3", + "element_id": "4e6611d25d5013d40f58a6f82e3aecdf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1329,11 +1329,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.2" + "text": "–0.1" }, { "type": "UncategorizedText", - "element_id": "f491e65f8d4b8dbec7621fcedaf1b7a4", + "element_id": "35efc6ded4e13f29a8d86e4f33294be0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1347,11 +1347,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.9" + "text": "3.1" }, { "type": "UncategorizedText", - "element_id": "35efc6ded4e13f29a8d86e4f33294be0", + "element_id": "a416ea84421fa7e1351582da48235bac", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1365,11 +1365,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.1" + "text": "3.0" }, { "type": "UncategorizedText", - "element_id": "4e6611d25d5013d40f58a6f82e3aecdf", + "element_id": "72d73db944cf6d9a5f11d6c073c1dce0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1383,11 +1383,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–0.1" + "text": "3.4" }, { "type": "UncategorizedText", - "element_id": "69dfc187e2e6d907a0546f7e76f8ee3f", + "element_id": "3135d2d71bff77be4838a7102bbac5b8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1401,11 +1401,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.2" + "text": "3.2" }, { - "type": "Title", - "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", + "type": "UncategorizedText", + "element_id": "f491e65f8d4b8dbec7621fcedaf1b7a4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1419,11 +1419,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "World Output" + "text": "2.9" }, { - "type": "UncategorizedText", - "element_id": "a416ea84421fa7e1351582da48235bac", + "type": "Title", + "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1437,11 +1437,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.0" + "text": "World Output" }, { "type": "UncategorizedText", - "element_id": "3135d2d71bff77be4838a7102bbac5b8", + "element_id": "44896b09365746b5f7167ee4d64988a3", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1455,7 +1455,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.2" + "text": "0.2" }, { "type": "UncategorizedText", @@ -1476,8 +1476,8 @@ "text": "1.9" }, { - "type": "Title", - "element_id": "6185fd66a4e106814e65c047c15dfb1f", + "type": "UncategorizedText", + "element_id": "eae9d4d60a1fe2df23f7b65ae3d76ca8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1491,11 +1491,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Advanced Economies United States Euro Area" + "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4" }, { "type": "UncategorizedText", - "element_id": "2a9680555d457b6da4b6748492bb6f3d", + "element_id": "1a009e8c6bb6dada03c326655a15bedf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1509,11 +1509,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3" + "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1" }, { "type": "UncategorizedText", - "element_id": "1776cf91dccdf2cce268fcee416b28f6", + "element_id": "6976f35f9f91b539b46743f37d94014a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1527,11 +1527,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2" + "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8" }, { "type": "UncategorizedText", - "element_id": "777e0063772d428bf1c04383b8ad058e", + "element_id": "2a9680555d457b6da4b6748492bb6f3d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1545,11 +1545,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4" + "text": "5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3" }, { "type": "UncategorizedText", - "element_id": "1a009e8c6bb6dada03c326655a15bedf", + "element_id": "f22875edf393e3502ad60c82e81c5933", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1563,11 +1563,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1" + "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3" }, { "type": "UncategorizedText", - "element_id": "eae9d4d60a1fe2df23f7b65ae3d76ca8", + "element_id": "1776cf91dccdf2cce268fcee416b28f6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1581,11 +1581,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4" + "text": "1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2" }, { "type": "UncategorizedText", - "element_id": "d8236eb6a9bab4f3d37735048ab5aeee", + "element_id": "2f6f72296f8ab115fda4292808436b88", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1599,11 +1599,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0" + "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2" }, { - "type": "UncategorizedText", - "element_id": "6976f35f9f91b539b46743f37d94014a", + "type": "Title", + "element_id": "6185fd66a4e106814e65c047c15dfb1f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1617,11 +1617,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8" + "text": "Advanced Economies United States Euro Area" }, { "type": "UncategorizedText", - "element_id": "2f6f72296f8ab115fda4292808436b88", + "element_id": "d8236eb6a9bab4f3d37735048ab5aeee", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1635,11 +1635,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2" + "text": "1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0" }, { "type": "UncategorizedText", - "element_id": "f22875edf393e3502ad60c82e81c5933", + "element_id": "777e0063772d428bf1c04383b8ad058e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1653,7 +1653,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3" + "text": "1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4" }, { "type": "Title", @@ -1692,8 +1692,8 @@ "text": "Japan United Kingdom Canada Other Advanced Economies 3/" }, { - "type": "Title", - "element_id": "a4ca51cd6c74adf51f6e9ce60165d047", + "type": "UncategorizedText", + "element_id": "07adb8acdd66b5d2490e542ae0604b71", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1707,11 +1707,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "Emerging Market and Developing Economies Emerging and Developing Asia" + "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8" }, { "type": "UncategorizedText", - "element_id": "f4e79a2ba19a5b842cff288f8e4eafd0", + "element_id": "a7143daa9de8af6e0c465ca1354d45b6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1725,7 +1725,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5" + "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9" }, { "type": "UncategorizedText", @@ -1747,7 +1747,7 @@ }, { "type": "UncategorizedText", - "element_id": "1ea8f3c3db2cb6c75f21ebf26acc28a5", + "element_id": "53bcbc5ff007dd49a07f6fb79ef96ef9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1761,7 +1761,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2" + "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6" }, { "type": "UncategorizedText", @@ -1782,8 +1782,8 @@ "text": "–0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0" }, { - "type": "UncategorizedText", - "element_id": "9d1bc5abd6f3e9c4c6ccb572ae521387", + "type": "Title", + "element_id": "a4ca51cd6c74adf51f6e9ce60165d047", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1797,11 +1797,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3" + "text": "Emerging Market and Developing Economies Emerging and Developing Asia" }, { "type": "UncategorizedText", - "element_id": "53bcbc5ff007dd49a07f6fb79ef96ef9", + "element_id": "d7b26ee43ca5481505ca9eb7c3b29b2c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1815,11 +1815,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6" + "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0" }, { "type": "UncategorizedText", - "element_id": "d7b26ee43ca5481505ca9eb7c3b29b2c", + "element_id": "9d1bc5abd6f3e9c4c6ccb572ae521387", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1833,11 +1833,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0" + "text": "4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3" }, { "type": "UncategorizedText", - "element_id": "a7143daa9de8af6e0c465ca1354d45b6", + "element_id": "1ea8f3c3db2cb6c75f21ebf26acc28a5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1851,11 +1851,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9" + "text": "4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2" }, { "type": "UncategorizedText", - "element_id": "07adb8acdd66b5d2490e542ae0604b71", + "element_id": "f4e79a2ba19a5b842cff288f8e4eafd0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1869,7 +1869,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8" + "text": "5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5" }, { "type": "Title", @@ -2035,7 +2035,7 @@ }, { "type": "UncategorizedText", - "element_id": "4d5d14d8c932363fe84036564c6c582b", + "element_id": "39b99440eae2f9ee75cf98100c285787", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2049,11 +2049,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "1.7 1.8 3.7 . . . 2.5 . . ." + "text": "2.5 2.0 4.0 . . . 4.1 . . ." }, { "type": "UncategorizedText", - "element_id": "39b99440eae2f9ee75cf98100c285787", + "element_id": "98e45a005510dc136e14094ee7ed7faf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2067,11 +2067,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.5 2.0 4.0 . . . 4.1 . . ." + "text": "2.5 1.2 5.7 . . . 5.0 . . ." }, { "type": "UncategorizedText", - "element_id": "dbc6d298b0672b8176de90a623844b7f", + "element_id": "effb80722a72ecff482b7a0d4a027e78", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2085,11 +2085,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "6.0 5.5 3.8 4.1 7.0 4.1" + "text": "0.3 0.0 –0.2 –0.4 0.4 0.0" }, { "type": "UncategorizedText", - "element_id": "effb80722a72ecff482b7a0d4a027e78", + "element_id": "123157612cd26d61b4760a5ecd1f4bfc", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2103,11 +2103,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "0.3 0.0 –0.2 –0.4 0.4 0.0" + "text": "2.5 1.8 4.7 3.5 4.1 5.6" }, { "type": "UncategorizedText", - "element_id": "98e45a005510dc136e14094ee7ed7faf", + "element_id": "dbc6d298b0672b8176de90a623844b7f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2121,11 +2121,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.5 1.2 5.7 . . . 5.0 . . ." + "text": "6.0 5.5 3.8 4.1 7.0 4.1" }, { "type": "UncategorizedText", - "element_id": "123157612cd26d61b4760a5ecd1f4bfc", + "element_id": "743f3bc42f087068035515a8dec4f85a", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2139,7 +2139,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "2.5 1.8 4.7 3.5 4.1 5.6" + "text": "3.1 3.7 5.2 5.4 3.8 4.9" }, { "type": "UncategorizedText", @@ -2161,7 +2161,7 @@ }, { "type": "UncategorizedText", - "element_id": "037023840d334f9f357a6c3da2b058ff", + "element_id": "4d5d14d8c932363fe84036564c6c582b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2175,11 +2175,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1" + "text": "1.7 1.8 3.7 . . . 2.5 . . ." }, { "type": "UncategorizedText", - "element_id": "743f3bc42f087068035515a8dec4f85a", + "element_id": "037023840d334f9f357a6c3da2b058ff", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2193,11 +2193,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.1 3.7 5.2 5.4 3.8 4.9" + "text": "–0.1 –0.3 –0.2 0.2 0.0 0.1" }, { "type": "UncategorizedText", - "element_id": "0c76bc4e35219e2a31b09428cd47d009", + "element_id": "e352203d837b1096ee96e1977f1c3d0b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2211,7 +2211,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies" + "text": "5.4 6.6 3.4" }, { "type": "UncategorizedText", @@ -2233,7 +2233,7 @@ }, { "type": "UncategorizedText", - "element_id": "7fdc64e781146808df57eac112860f9b", + "element_id": "e4fe15854d6650b5b102d8b1c11eb0ba", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2247,7 +2247,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "3.4 2.7 4.6" + "text": "10.4 9.4 12.1" }, { "type": "UncategorizedText", @@ -2269,7 +2269,7 @@ }, { "type": "UncategorizedText", - "element_id": "708c57a76a5cf81dc197cc1bd612adb2", + "element_id": "7fdc64e781146808df57eac112860f9b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2283,11 +2283,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": ". . . . . . . . ." + "text": "3.4 2.7 4.6" }, { "type": "UncategorizedText", - "element_id": "e4fe15854d6650b5b102d8b1c11eb0ba", + "element_id": "708c57a76a5cf81dc197cc1bd612adb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2301,7 +2301,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "10.4 9.4 12.1" + "text": ". . . . . . . . ." }, { "type": "UncategorizedText", @@ -2359,7 +2359,7 @@ }, { "type": "UncategorizedText", - "element_id": "e352203d837b1096ee96e1977f1c3d0b", + "element_id": "0c76bc4e35219e2a31b09428cd47d009", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2373,7 +2373,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "5.4 6.6 3.4" + "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies" }, { "type": "NarrativeText", @@ -2395,7 +2395,7 @@ }, { "type": "UncategorizedText", - "element_id": "7268a41308c4276447de2a707b5df73c", + "element_id": "b432234c878eb484525dbb0c9be461fe", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2409,7 +2409,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–16.2 –6.3" + "text": "65.8 26.4" }, { "type": "UncategorizedText", @@ -2431,7 +2431,7 @@ }, { "type": "UncategorizedText", - "element_id": "3d5c2c97e00e0c5be2a870cf1cbaac06", + "element_id": "7268a41308c4276447de2a707b5df73c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2445,11 +2445,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "11.2 –2.0" + "text": "–16.2 –6.3" }, { "type": "UncategorizedText", - "element_id": "84bc47d0d0703878a250620230630525", + "element_id": "cf39ab5ed0773cea3681c2ac35e6b706", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2463,11 +2463,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–3.3 –0.1" + "text": "–7.1 –0.4" }, { "type": "UncategorizedText", - "element_id": "cf39ab5ed0773cea3681c2ac35e6b706", + "element_id": "84bc47d0d0703878a250620230630525", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2481,11 +2481,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–7.1 –0.4" + "text": "–3.3 –0.1" }, { "type": "UncategorizedText", - "element_id": "301b9fd38725258f32816ff1a855be3e", + "element_id": "ebb1568088af8b7c7b98878b895decaf", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2499,11 +2499,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–5.9 –0.2" + "text": "–0.9 0.3" }, { "type": "UncategorizedText", - "element_id": "b432234c878eb484525dbb0c9be461fe", + "element_id": "3d5c2c97e00e0c5be2a870cf1cbaac06", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2517,11 +2517,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "65.8 26.4" + "text": "11.2 –2.0" }, { "type": "UncategorizedText", - "element_id": "ebb1568088af8b7c7b98878b895decaf", + "element_id": "4150b86a3fffd48fc159e81c9b7325db", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2535,11 +2535,11 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–0.9 0.3" + "text": "–9.8 1.4" }, { "type": "UncategorizedText", - "element_id": "4150b86a3fffd48fc159e81c9b7325db", + "element_id": "301b9fd38725258f32816ff1a855be3e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2553,7 +2553,7 @@ "filetype": "application/pdf", "page_number": 7 }, - "text": "–9.8 1.4" + "text": "–5.9 –0.2" }, { "type": "UncategorizedText", @@ -2935,7 +2935,7 @@ }, { "type": "ListItem", - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", + "element_id": "2d14934d52ff357c52e9ae1c38f7390e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2949,11 +2949,11 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "" + "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." }, { "type": "ListItem", - "element_id": "2d14934d52ff357c52e9ae1c38f7390e", + "element_id": "e3b0c44298fc1c149afbf4c8996fb924", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -2967,7 +2967,7 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box 1) and dollar depreciation. About 15 percent of low-income countries are estimated to be in debt distress, with an additional 45 percent at high risk of debt distress and about 25 percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy." + "text": "" }, { "type": "ListItem", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json index 8bb8ab6306..9bfb00cf8a 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json @@ -342,8 +342,8 @@ "text": "Electricity is central to modern life – it powers our daily lives, as well as our dreams and ambitions. Demand has grown steadily for more than 100 years, and will continue to do so as many parts of the world continue to develop, and electrification takes a central role in efforts to decarbonize (Figure 1). With nearly a billion people around the world still living in the dark, without access to electricity, humanity has a responsibility to learn from the past - everyone has the right to enjoy a modern lifestyle in a way that does not cause harm to people or the planet." }, { - "type": "UncategorizedText", - "element_id": "b4af08fb653ae7dea99f3a48c2ff7f5d", + "type": "Title", + "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -357,11 +357,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "45,000" + "text": "h W T" }, { - "type": "Title", - "element_id": "563a2980d46c81119e1d7d952b375a41", + "type": "UncategorizedText", + "element_id": "b4af08fb653ae7dea99f3a48c2ff7f5d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -375,7 +375,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "h W T" + "text": "45,000" }, { "type": "Image", @@ -1097,6 +1097,24 @@ }, "text": "140" }, + { + "type": "Image", + "element_id": "0fece208b80790baa3ae323ace21f818", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": 177372694731575984083482917563244941766, + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + }, + "date_modified": "2023-02-12T10:10:36" + }, + "filetype": "application/pdf", + "page_number": 8 + }, + "text": " 140 120 120 1 : 100 99.5 : 80 71.9 1 n 60 . 1 40 : “99 : 85 7g 0245 <0.01 0 : : : > S & 3} cs s\\ é fos < < Qg eS S ew ee © RS Rs ~a S Se fe) we" + }, { "type": "NarrativeText", "element_id": "e11247712b3df61756970b45f019ad68", @@ -1205,24 +1223,6 @@ }, "text": "a t a F" }, - { - "type": "Image", - "element_id": "0fece208b80790baa3ae323ace21f818", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": 177372694731575984083482917563244941766, - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 8 - }, - "text": " 140 120 120 1 : 100 99.5 : 80 71.9 1 n 60 . 1 40 : “99 : 85 7g 0245 <0.01 0 : : : > S & 3} cs s\\ é fos < < Qg eS S ew ee © RS Rs ~a S Se fe) we" - }, { "type": "FigureCaption", "element_id": "445676822969fb5177c0081d07449a70", @@ -1279,7 +1279,7 @@ }, { "type": "UncategorizedText", - "element_id": "ad57366865126e55649ecb23ae1d4888", + "element_id": "bbf3f11cb5b43e700273a78d12de55e4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1293,11 +1293,11 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "100" + "text": "%" }, { "type": "UncategorizedText", - "element_id": "bbf3f11cb5b43e700273a78d12de55e4", + "element_id": "ad57366865126e55649ecb23ae1d4888", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1311,7 +1311,7 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "%" + "text": "100" }, { "type": "Image", @@ -1422,8 +1422,8 @@ "text": "Figure 5. The importance of nuclear in ensuring clean energy systems in France, Sweden and Switzerland ix" }, { - "type": "Title", - "element_id": "563a2980d46c81119e1d7d952b375a41", + "type": "Image", + "element_id": "77d8044f595648ff9853b27fadd6ef94", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1437,11 +1437,11 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "h W T" + "text": " BB Non-hydro 500 i ren. & waste 400 z= Nuclear Natural gas 300 y -— EB Hydro i oil 200 —— -— BB Coal 100" }, { - "type": "Image", - "element_id": "77d8044f595648ff9853b27fadd6ef94", + "type": "Title", + "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1455,7 +1455,7 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": " BB Non-hydro 500 i ren. & waste 400 z= Nuclear Natural gas 300 y -— EB Hydro i oil 200 —— -— BB Coal 100" + "text": "h W T" }, { "type": "FigureCaption", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json index 9f4ebb6871..fddc9f49d5 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json @@ -270,8 +270,8 @@ "text": "It is widely accepted that humans have skewed perceptions of risks, and the way we respond to them is shaped by these perceptions, rather than the actual threats posed. Approximately 1.35 millioni people die every year because of traffic accidents, in comparison with 257 aviation fatalities in 2019ii, yet more people are nervous about flying, fearing a rare deadly crash, than being in a fatal traffic accident. These numbers tell a powerful and well-established story: evaluations of risk are largely the result of emotions, rather than logic or facts. Although it is hard to recognize and accept that our perceptions may mislead us and curtail effective decision making, this is a well-established characteristic of humanity." }, { - "type": "Title", - "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", + "type": "NarrativeText", + "element_id": "45e9c81bf6ccdc498a6ac5640d786736", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -285,11 +285,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Rank Order Laypersons" + "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." }, { "type": "Title", - "element_id": "5e12750596bdf1413e64c24997479b21", + "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -303,11 +303,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Experts" + "text": "Rank Order Laypersons" }, { - "type": "NarrativeText", - "element_id": "45e9c81bf6ccdc498a6ac5640d786736", + "type": "Table", + "element_id": "07e04cdff751f52e042c08c1b265b6f5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -321,11 +321,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific." + "text": "_Laypersons Experts 1 2 3 Handguns 4 + Nuclear power 20 Motor vehicles 1 4 Smoking 2 17 Electric power (non-nuclear) 9 1 | + + 22 xrays 7 30 Vaccinations 25" }, { - "type": "Table", - "element_id": "07e04cdff751f52e042c08c1b265b6f5", + "type": "Title", + "element_id": "5e12750596bdf1413e64c24997479b21", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -339,7 +339,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "_Laypersons Experts 1 2 3 Handguns 4 + Nuclear power 20 Motor vehicles 1 4 Smoking 2 17 Electric power (non-nuclear) 9 1 | + + 22 xrays 7 30 Vaccinations 25" + "text": "Experts" }, { "type": "UncategorizedText", @@ -360,8 +360,8 @@ "text": "20" }, { - "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "type": "Title", + "element_id": "82a60569029ed9032f1b08891e8524c2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -375,11 +375,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "1" + "text": "Nuclear power" }, { - "type": "Title", - "element_id": "82a60569029ed9032f1b08891e8524c2", + "type": "UncategorizedText", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -393,11 +393,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Nuclear power" + "text": "1" }, { - "type": "Title", - "element_id": "602d25f25cca4ebb709f8b48f54d99d9", + "type": "UncategorizedText", + "element_id": "6b86b273ff34fce19d6b804eff5a3f57", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -411,11 +411,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Motor vehicles" + "text": "1" }, { - "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "type": "Title", + "element_id": "602d25f25cca4ebb709f8b48f54d99d9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -429,11 +429,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "Motor vehicles" }, { "type": "UncategorizedText", - "element_id": "6b86b273ff34fce19d6b804eff5a3f57", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -447,7 +447,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "1" + "text": "2" }, { "type": "UncategorizedText", @@ -523,7 +523,7 @@ }, { "type": "UncategorizedText", - "element_id": "4b227777d4dd1fc61c6f884f48641d02", + "element_id": "d4735e3a265e16eee03f59718b9b5d03", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -537,11 +537,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "4" + "text": "2" }, { "type": "UncategorizedText", - "element_id": "d4735e3a265e16eee03f59718b9b5d03", + "element_id": "4b227777d4dd1fc61c6f884f48641d02", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -555,7 +555,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "2" + "text": "4" }, { "type": "UncategorizedText", @@ -594,8 +594,8 @@ "text": "" }, { - "type": "Title", - "element_id": "1656c455012b016fbac5eac0a38397bd", + "type": "UncategorizedText", + "element_id": "4523540f1504cd17100c4835e85b7eef", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -609,11 +609,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Electric power (non-nuclear)" + "text": "17" }, { "type": "UncategorizedText", - "element_id": "4523540f1504cd17100c4835e85b7eef", + "element_id": "19581e27de7ced00ff1ce50b2047e7a5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -627,11 +627,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "17" + "text": "9" }, { - "type": "UncategorizedText", - "element_id": "19581e27de7ced00ff1ce50b2047e7a5", + "type": "Title", + "element_id": "1656c455012b016fbac5eac0a38397bd", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -645,7 +645,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "9" + "text": "Electric power (non-nuclear)" }, { "type": "UncategorizedText", @@ -702,8 +702,8 @@ "text": "22" }, { - "type": "UncategorizedText", - "element_id": "7902699be42c8a8e46fbbb4501726517", + "type": "Title", + "element_id": "2f3122790ccc9e095abe1b5ceedddf88", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -717,11 +717,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "7" + "text": "X-rays" }, { - "type": "Title", - "element_id": "2f3122790ccc9e095abe1b5ceedddf88", + "type": "UncategorizedText", + "element_id": "7902699be42c8a8e46fbbb4501726517", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -735,7 +735,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "X-rays" + "text": "7" }, { "type": "UncategorizedText", @@ -774,8 +774,8 @@ "text": "" }, { - "type": "Title", - "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", + "type": "UncategorizedText", + "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -789,11 +789,11 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Vaccinations" + "text": "30" }, { - "type": "UncategorizedText", - "element_id": "624b60c58c9d8bfb6ff1886c2fd605d2", + "type": "Title", + "element_id": "ed3861e631428b9b77e2bdc0384d2cbe", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -807,7 +807,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "30" + "text": "Vaccinations" }, { "type": "UncategorizedText", @@ -882,8 +882,8 @@ "text": "1 The original study was published in 1978, but its findings have been confirmed by numerous studies since." }, { - "type": "NarrativeText", - "element_id": "e11247712b3df61756970b45f019ad68", + "type": "Image", + "element_id": "aa493f4c5f573e209dc5e56d5e2a341f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -897,11 +897,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "r a e y" + "text": "Natural Artificial @ 48% Radon @ 11% Medicine @ 14% Buildings & soil @ 0.4% = Fallout @ 12% Food & water @ 0.4% Miscellaneous @ 10% Cosmic @ 0.2% Occupational @ 4% = Thoron @ 0.04% Nuclear discharges " }, { - "type": "Title", - "element_id": "3f79bb7b435b05321651daefd374cdc6", + "type": "FigureCaption", + "element_id": "9b657ab0d2ea482c887c7877ba86598d", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -915,11 +915,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "e" + "text": "Figure 2. Global average exposure from different sources of radiation" }, { - "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "type": "NarrativeText", + "element_id": "4469b98946c004fbae47ad6285c9bba4", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -933,11 +933,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "W T" + "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." }, { "type": "NarrativeText", - "element_id": "f9bb49945b60897227abdd75b5f8d39b", + "element_id": "cbf390f564b0b1197deb5bf3dd999291", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -951,11 +951,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "r e p s e i t i l" + "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." }, { - "type": "Title", - "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", + "type": "NarrativeText", + "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -969,11 +969,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "a t a F" + "text": "r a e y" }, { - "type": "Image", - "element_id": "aa493f4c5f573e209dc5e56d5e2a341f", + "type": "Title", + "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -987,11 +987,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Natural Artificial @ 48% Radon @ 11% Medicine @ 14% Buildings & soil @ 0.4% = Fallout @ 12% Food & water @ 0.4% Miscellaneous @ 10% Cosmic @ 0.2% Occupational @ 4% = Thoron @ 0.04% Nuclear discharges " + "text": "e" }, { - "type": "FigureCaption", - "element_id": "9b657ab0d2ea482c887c7877ba86598d", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1005,11 +1005,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Figure 2. Global average exposure from different sources of radiation" + "text": "W T" }, { "type": "NarrativeText", - "element_id": "4469b98946c004fbae47ad6285c9bba4", + "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1023,11 +1023,11 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "Fossil fuels – currently accounting for around 81% of total energy supplyiv – cause significant levels of emissions in terms of both greenhouse gases and air pollutants. Despite the serious and ongoing health and environmental harms caused by air pollution, it is often considered to be an inevitable consequence of economic development. Air pollution’s contribution to the burden of disease is profound, with an estimated 8.7 million people dying worldwide prematurely in 2018 alonev,vi. Despite this, it fails to induce the same fears and anxieties in people as nuclear energy does." + "text": "r e p s e i t i l" }, { - "type": "NarrativeText", - "element_id": "cbf390f564b0b1197deb5bf3dd999291", + "type": "Title", + "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1041,7 +1041,7 @@ "filetype": "application/pdf", "page_number": 5 }, - "text": "In terms of accidents, hydropower is the deadliest electricity generator, mostly due to collapsing dams and the consequences of flooding. The Banqiao Dam failure in 1975 led to at least 26,000 people drowning, and as many as 150,000 deaths resulting from the secondary effects of the accident. In comparison, radiation exposure following Chernobyl caused 54 deaths2, while no casualties due to radiation are likely to occur from the accident at Fukushima Daiichi." + "text": "a t a F" }, { "type": "UncategorizedText", @@ -1639,7 +1639,7 @@ }, { "type": "Title", - "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", + "element_id": "e56261e0bd30965b8e68ed2abb15b141", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1653,11 +1653,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "i" + "text": "References" }, { - "type": "Title", - "element_id": "5d7f49449ab22deac22d767b89549c55", + "type": "ListItem", + "element_id": "c06ac75f019ceac1ff2baecfc090fd3e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1671,11 +1671,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "ii" + "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" }, { "type": "Title", - "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", + "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1689,11 +1689,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "v" + "text": "i" }, { - "type": "Title", - "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", + "type": "ListItem", + "element_id": "199440a0821e16b612f4697aa2306cb2", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1707,11 +1707,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "vi" + "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" }, { "type": "Title", - "element_id": "e56261e0bd30965b8e68ed2abb15b141", + "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1725,11 +1725,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "References" + "text": "ii" }, { "type": "ListItem", - "element_id": "c06ac75f019ceac1ff2baecfc090fd3e", + "element_id": "18b2cdcbf43cbcab942c6ffa69abdc51", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1743,11 +1743,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "World Health Organization (2020). Road traffic injuries. Available at: https://www.who.int/news-room/fact-sheets/ detail/road-traffic-injuries" + "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." }, { "type": "ListItem", - "element_id": "199440a0821e16b612f4697aa2306cb2", + "element_id": "6febbd0bffa8633c6c188165767c843c", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1761,11 +1761,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "BBC (2020). Plane crash fatalities fell more than 50% in 2019. Available at: https://www.bbc.co.uk/news/ business-50953712" + "text": "United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https:/Avww.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" }, { "type": "ListItem", - "element_id": "18b2cdcbf43cbcab942c6ffa69abdc51", + "element_id": "81be06e67a1b533cb1278b15860c51db", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1779,11 +1779,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Slovic, P., 2010. The Psychology of risk. Saúde e Sociedade, 19(4), pp. 731-747." + "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" }, { - "type": "ListItem", - "element_id": "6febbd0bffa8633c6c188165767c843c", + "type": "Title", + "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1797,11 +1797,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "United Nations Scientific Committee on the Effects of Radiation (2016). Report of the United Nations Scientific Committee on the Effects of Atomic Radiation. Accessed from: https:/Avww.unscear.org/docs/publications/2016/ UNSCEAR_2016_GA-Report-CORR.pdf" + "text": "v" }, { - "type": "ListItem", - "element_id": "81be06e67a1b533cb1278b15860c51db", + "type": "Title", + "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", @@ -1815,7 +1815,7 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "International Energy Agency (2020). Global share of total energy supply by source, 2018. Key World Energy Statistics 2020. Available at: https://www.iea.org/data-and-statistics/charts/global-share-of-total-energy-supply-by- source-2018" + "text": "vi" }, { "type": "ListItem", diff --git a/unstructured/partition/utils/sorting.py b/unstructured/partition/utils/sorting.py index 62b63acb90..081d60156e 100644 --- a/unstructured/partition/utils/sorting.py +++ b/unstructured/partition/utils/sorting.py @@ -32,7 +32,7 @@ def coordinates_to_bbox(coordinates: CoordinatesMetadata) -> Tuple[int, int, int def shrink_bbox(bbox: Tuple[int, int, int, int], shrink_factor) -> Tuple[int, int, int, int]: """ - Shrink a bounding box by a given shrink factor while maintaining its center. + Shrink a bounding box by a given shrink factor while maintaining its top and left. Parameters: bbox (Tuple[int, int, int, int]): The original bounding box represented by @@ -49,14 +49,12 @@ def shrink_bbox(bbox: Tuple[int, int, int, int], shrink_factor) -> Tuple[int, in height = bottom - top new_width = width * shrink_factor new_height = height * shrink_factor - dw = (width - new_width) / 2 - dh = (height - new_height) / 2 + dw = width - new_width + dh = height - new_height - new_left = left + dw new_right = right - dw - new_top = top + dh new_bottom = bottom - dh - return int(new_left), int(new_top), int(new_right), int(new_bottom) + return int(left), int(top), int(new_right), int(new_bottom) def coord_has_valid_points(coordinates: CoordinatesMetadata) -> bool: