diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d27953a61..ff86e84aa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ -## 0.16.6-dev1 +## 0.16.6-dev2 ### Enhancements - **Every tag is considered to be ontology.Table** Added special handling for tables in HTML partitioning. This change is made to improve the accuracy of table extraction from HTML documents. - **Every HTML has default ontology class assigned** When parsing HTML to ontology each defined HTML in the Ontology has assigned default ontology class. This way it is possible to assign ontology class instead of UncategorizedText when the HTML tag is predicted correctly without class assigned class +- **Use (number of actual table) weighted average for table metrics** In evaluating table metrics the mean aggregation now uses the actual number of tables in a document to weight the metric scores ### Features diff --git a/example-docs/test_evaluate_files/gold_standard_table_structure/2022-financial-statements-p11.pdf.json b/example-docs/test_evaluate_files/gold_standard_table_structure/2022-financial-statements-p11.pdf.json new file mode 100644 index 0000000000..59f619cf33 --- /dev/null +++ b/example-docs/test_evaluate_files/gold_standard_table_structure/2022-financial-statements-p11.pdf.json @@ -0,0 +1,812 @@ +[ + { + "type": "Header", + "text": "I. General Department" + }, + { + "type": "Title", + "text": 1 + }, + { + "type": "Table", + "text": [ + { + "id": "66f5f15d-273f-43c3-9b51-ec6d28637e12", + "x": 0, + "y": 0, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "34f5f20a-d2d3-48ed-9c3a-416bca0ff517", + "x": 0, + "y": 1, + "w": 1, + "h": 1, + "content": "Assets" + }, + { + "id": "2330a22c-58d5-4c14-8dcc-7463b1b519f3", + "x": 0, + "y": 2, + "w": 1, + "h": 1, + "content": "Usable currencies" + }, + { + "id": "c9e62a61-33da-4cf3-a3f7-50e779e432ae", + "x": 0, + "y": 3, + "w": 1, + "h": 1, + "content": "Other currencies" + }, + { + "id": "9bd02245-2cff-4d72-ac3c-d14bb9f3e240", + "x": 0, + "y": 4, + "w": 1, + "h": 1, + "content": "Credit outstanding" + }, + { + "id": "3756106b-7b23-48d2-ac7d-af19fc25ff92", + "x": 0, + "y": 5, + "w": 1, + "h": 1, + "content": "Total currencies" + }, + { + "id": "eff641b2-b568-4492-9e0f-6af2a33fc107", + "x": 0, + "y": 6, + "w": 1, + "h": 1, + "content": "SDR holdings" + }, + { + "id": "00601fec-6ed4-401c-bf23-40597a6173bd", + "x": 0, + "y": 7, + "w": 1, + "h": 1, + "content": "Investments" + }, + { + "id": "7a057d2c-a8ad-438e-9e10-a49e7194147a", + "x": 0, + "y": 8, + "w": 1, + "h": 1, + "content": "Gold holdings" + }, + { + "id": "d4c05f57-ff6d-4d02-a23a-cfc3fb78c3fc", + "x": 0, + "y": 9, + "w": 1, + "h": 1, + "content": "Property, plant and equipment and intangible assets" + }, + { + "id": "3c99613d-47c7-468c-9745-84fedcddd33c", + "x": 0, + "y": 10, + "w": 1, + "h": 1, + "content": "Net assets under retirement benefit plans" + }, + { + "id": "9d1b1597-cc83-4b14-b19b-911418f6b7c7", + "x": 0, + "y": 11, + "w": 1, + "h": 1, + "content": "Other assets" + }, + { + "id": "fcad018e-53b5-43b9-b2ec-1a25dd38427b", + "x": 0, + "y": 12, + "w": 1, + "h": 1, + "content": "Total assets" + }, + { + "id": "608b3a56-db63-439a-a842-883a8ef3563c", + "x": 0, + "y": 13, + "w": 1, + "h": 1, + "content": "Liabilities" + }, + { + "id": "a98f2cee-0af4-426b-8990-dd2367721b1f", + "x": 0, + "y": 14, + "w": 1, + "h": 1, + "content": "Special Contingent Account" + }, + { + "id": "dac6f09f-8d7f-468c-9c58-e8e9cb472322", + "x": 0, + "y": 15, + "w": 1, + "h": 1, + "content": "Borrowings" + }, + { + "id": "20287888-b7ea-44c0-bd5f-402e32fad446", + "x": 0, + "y": 16, + "w": 1, + "h": 1, + "content": "Quota subscriptions" + }, + { + "id": "5bdec1a0-8cb2-4399-b078-dccecd64cca0", + "x": 0, + "y": 17, + "w": 1, + "h": 1, + "content": "Net liabilities under retirement benefit plans" + }, + { + "id": "6a8839cd-8554-4aad-813f-a51add864538", + "x": 0, + "y": 18, + "w": 1, + "h": 1, + "content": "Other liabilities" + }, + { + "id": "f6c3100d-6b1d-4efa-8bdb-862da646f037", + "x": 0, + "y": 19, + "w": 1, + "h": 1, + "content": "Total liabilities" + }, + { + "id": "cc43bc34-b7bf-47e2-9036-cd51339f21a8", + "x": 0, + "y": 20, + "w": 1, + "h": 1, + "content": "Reserves of the General Resources Account" + }, + { + "id": "b2d8455c-4a8a-46fc-b22b-8f6da9d19237", + "x": 0, + "y": 21, + "w": 1, + "h": 1, + "content": "Retained earnings of the Investment Account" + }, + { + "id": "faf36e7c-34ff-4725-a1e4-7ed5c923d1a4", + "x": 0, + "y": 22, + "w": 1, + "h": 1, + "content": "Resources of the Special Disbursement Account" + }, + { + "id": "e13ca441-7494-4e72-82c7-235147b02530", + "x": 0, + "y": 23, + "w": 1, + "h": 1, + "content": "Total liabilities, reserves, retained earnings, and resources" + }, + { + "id": "1ad7df6d-9f31-4f45-8090-769546dd0a65", + "x": 1, + "y": 0, + "w": 1, + "h": 1, + "content": "Note" + }, + { + "id": "2501d35a-f1b5-457a-97cc-31fc903b835f", + "x": 1, + "y": 1, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "474f2539-07b1-4fbd-be3c-1e81c80d66a5", + "x": 1, + "y": 2, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "b712f0ec-4c64-49c3-919b-57b87d612450", + "x": 1, + "y": 3, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "68fac5df-08fd-44ad-afc2-ea4d83b2a5d4", + "x": 1, + "y": 4, + "w": 1, + "h": 1, + "content": "5" + }, + { + "id": "0c8e5e2a-868e-470d-b95e-b4af1d2b106e", + "x": 1, + "y": 5, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "b01c4ad4-be06-4e17-b62a-b654dfb703dc", + "x": 1, + "y": 6, + "w": 1, + "h": 1, + "content": "6" + }, + { + "id": "a4d8eaca-b046-4dd8-80af-03fea8e3e22d", + "x": 1, + "y": 7, + "w": 1, + "h": 1, + "content": "7" + }, + { + "id": "aa674388-765b-4380-b902-07b25dc071a3", + "x": 1, + "y": 8, + "w": 1, + "h": 1, + "content": "9" + }, + { + "id": "40524dab-cb00-4b3a-ad1c-e8b084ca2f02", + "x": 1, + "y": 9, + "w": 1, + "h": 1, + "content": "10" + }, + { + "id": "51fd8888-c373-47b0-aee0-8cbb435f4e80", + "x": 1, + "y": 10, + "w": 1, + "h": 1, + "content": "11" + }, + { + "id": "8025c648-d9f2-46e2-b297-b47a8e87be02", + "x": 1, + "y": 11, + "w": 1, + "h": 1, + "content": "12" + }, + { + "id": "913fd95f-50fa-4051-b0cc-f4fda99ca94d", + "x": 1, + "y": 12, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "17894253-6c15-4bfb-8044-688b48121d6d", + "x": 1, + "y": 13, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "2985e339-b559-43de-b61e-15e2c44f2261", + "x": 1, + "y": 14, + "w": 1, + "h": 1, + "content": "13" + }, + { + "id": "32573e9c-98de-4fda-a07d-f4a733bc09ca", + "x": 1, + "y": 15, + "w": 1, + "h": 1, + "content": "14" + }, + { + "id": "174f56b1-6579-4dce-bb41-54697ad6a672", + "x": 1, + "y": 16, + "w": 1, + "h": 1, + "content": "15" + }, + { + "id": "aed9448b-5d3a-49d1-98f5-a25b219879e3", + "x": 1, + "y": 17, + "w": 1, + "h": 1, + "content": "11" + }, + { + "id": "79806387-c606-4e3b-a1c7-14d1df1671fb", + "x": 1, + "y": 18, + "w": 1, + "h": 1, + "content": "12" + }, + { + "id": "72307eaf-9cfd-4075-97d9-76dab90c2469", + "x": 1, + "y": 19, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "772534a0-3ef9-43a2-ab60-2e18dd0859ec", + "x": 1, + "y": 20, + "w": 1, + "h": 1, + "content": "16" + }, + { + "id": "872339e5-8690-4be2-9e96-ce9e7c385eb7", + "x": 1, + "y": 21, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "f83024d7-8eba-4b72-a1ee-8654a63a4dc8", + "x": 1, + "y": 22, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "dc0df0e2-1383-4c2c-86e8-3bdfb747969c", + "x": 1, + "y": 23, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "156eeaae-e606-424b-9918-33e8a4b4edc7", + "x": 2, + "y": 0, + "w": 1, + "h": 1, + "content": "2022" + }, + { + "id": "d8d77e89-470d-4554-9835-e04d7b2dc42c", + "x": 2, + "y": 1, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "5f2283d0-c3eb-4586-93c0-2da0eee67fff", + "x": 2, + "y": 2, + "w": 1, + "h": 1, + "content": "292,280" + }, + { + "id": "e263efe7-9c83-4422-8760-d48738724b58", + "x": 2, + "y": 3, + "w": 1, + "h": 1, + "content": "69,407" + }, + { + "id": "7c30f9c7-677f-455c-8d64-8588a976306e", + "x": 2, + "y": 4, + "w": 1, + "h": 1, + "content": "93,031" + }, + { + "id": "790d6a30-7dee-4a88-87ab-f906440df5be", + "x": 2, + "y": 5, + "w": 1, + "h": 1, + "content": "454,718" + }, + { + "id": "c6919305-bbae-40b2-aa61-9c30fb737cf3", + "x": 2, + "y": 6, + "w": 1, + "h": 1, + "content": "22,270" + }, + { + "id": "2bbf179e-21c9-4464-a9bf-1a06e7b5f1d5", + "x": 2, + "y": 7, + "w": 1, + "h": 1, + "content": "25,418" + }, + { + "id": "6fd8d460-bc52-4843-a37a-760bc89f90aa", + "x": 2, + "y": 8, + "w": 1, + "h": 1, + "content": "3,167" + }, + { + "id": "f7dc815c-9d78-45b8-9f11-23c7ec5edf94", + "x": 2, + "y": 9, + "w": 1, + "h": 1, + "content": "551" + }, + { + "id": "91737fe0-b342-4a63-a423-9187156396c2", + "x": 2, + "y": 10, + "w": 1, + "h": 1, + "content": "1,375" + }, + { + "id": "336b3b67-3bc2-4df0-b9e0-9bcd3ed8f51f", + "x": 2, + "y": 11, + "w": 1, + "h": 1, + "content": "911" + }, + { + "id": "a91b131d-27b3-4580-8829-5ef74fd4c83b", + "x": 2, + "y": 12, + "w": 1, + "h": 1, + "content": "508,410" + }, + { + "id": "f5412732-1008-4272-aab5-8bcc9c2bbf42", + "x": 2, + "y": 13, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "7f69417a-5100-4698-98cf-00c19e7c20d9", + "x": 2, + "y": 14, + "w": 1, + "h": 1, + "content": "\u2014" + }, + { + "id": "30b95999-7ab0-4534-aa1a-27a88a72e023", + "x": 2, + "y": 15, + "w": 1, + "h": 1, + "content": "2,615" + }, + { + "id": "cc53c5a2-a8fe-4e94-b4bd-ba630c1da521", + "x": 2, + "y": 16, + "w": 1, + "h": 1, + "content": "476,272" + }, + { + "id": "3b8158b7-70ed-45de-970d-cd774d9df25e", + "x": 2, + "y": 17, + "w": 1, + "h": 1, + "content": "127" + }, + { + "id": "99370fae-c111-4de2-96a9-6cc4298568a8", + "x": 2, + "y": 18, + "w": 1, + "h": 1, + "content": "970" + }, + { + "id": "1a1810ef-2540-4864-903d-17b54946d812", + "x": 2, + "y": 19, + "w": 1, + "h": 1, + "content": "479,984" + }, + { + "id": "2fb39f36-409d-4ffe-b26b-7d02b2658b34", + "x": 2, + "y": 20, + "w": 1, + "h": 1, + "content": "26,524" + }, + { + "id": "068b6e4c-1c7d-4bf9-bd46-4961a93d7828", + "x": 2, + "y": 21, + "w": 1, + "h": 1, + "content": "1,902" + }, + { + "id": "2366f69b-dc1c-4d09-ba51-ebd2967b7bc0", + "x": 2, + "y": 22, + "w": 1, + "h": 1, + "content": "\u2014" + }, + { + "id": "d9babc16-6049-4fb0-83f7-93f5f8caff79", + "x": 2, + "y": 23, + "w": 1, + "h": 1, + "content": "508,410" + }, + { + "id": "c15bffd8-845d-45fe-b06c-2e2f7ed6845a", + "x": 3, + "y": 0, + "w": 1, + "h": 1, + "content": "2021" + }, + { + "id": "635715bd-ef82-4f2f-af3a-bad37448a647", + "x": 3, + "y": 1, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "27ea8755-e1ae-4e95-a20e-fa4fe6e5bb7e", + "x": 3, + "y": 2, + "w": 1, + "h": 1, + "content": "297,217" + }, + { + "id": "08911b39-a522-4578-84f8-ae91f795e063", + "x": 3, + "y": 3, + "w": 1, + "h": 1, + "content": "71,651" + }, + { + "id": "1857f867-e92d-4a70-85b7-2ca6b9b7d2f8", + "x": 3, + "y": 4, + "w": 1, + "h": 1, + "content": "89,788" + }, + { + "id": "75436437-bec2-47c2-b2c1-a99159f1311e", + "x": 3, + "y": 5, + "w": 1, + "h": 1, + "content": "458,656" + }, + { + "id": "82333684-445e-4f4e-8e1b-aeea61d953c5", + "x": 3, + "y": 6, + "w": 1, + "h": 1, + "content": "22,203" + }, + { + "id": "a8aeacef-99dc-428d-b95c-6ab981bab1cb", + "x": 3, + "y": 7, + "w": 1, + "h": 1, + "content": "23,032" + }, + { + "id": "74410f40-f4c4-4f44-b7e5-9958c8cb8bab", + "x": 3, + "y": 8, + "w": 1, + "h": 1, + "content": "3,167" + }, + { + "id": "390d2fc9-f167-4b7b-b611-adb781cf9003", + "x": 3, + "y": 9, + "w": 1, + "h": 1, + "content": "555" + }, + { + "id": "f2d06cd8-4de0-4c8b-a215-5859d4a22a1f", + "x": 3, + "y": 10, + "w": 1, + "h": 1, + "content": "\u2014" + }, + { + "id": "59414f75-8b58-4c5b-9656-c27605fe8b29", + "x": 3, + "y": 11, + "w": 1, + "h": 1, + "content": "706" + }, + { + "id": "1073551b-fca8-45f4-9a1a-4443fbe5ce6a", + "x": 3, + "y": 12, + "w": 1, + "h": 1, + "content": "508,319" + }, + { + "id": "afe5fcf4-83de-41f3-9c01-9864fd3d104e", + "x": 3, + "y": 13, + "w": 1, + "h": 1, + "content": "" + }, + { + "id": "4d349793-595d-47c2-9d11-613aa78ffdd6", + "x": 3, + "y": 14, + "w": 1, + "h": 1, + "content": "1,066" + }, + { + "id": "f1942864-03aa-43ac-9196-4a4fce689882", + "x": 3, + "y": 15, + "w": 1, + "h": 1, + "content": "5,138" + }, + { + "id": "76733d69-53ff-418f-ad04-397c00a1c4af", + "x": 3, + "y": 16, + "w": 1, + "h": 1, + "content": "475,808" + }, + { + "id": "d3e41ea2-c8ec-44e6-8883-9bd7b0b2eabc", + "x": 3, + "y": 17, + "w": 1, + "h": 1, + "content": "205" + }, + { + "id": "3774efda-bddb-46ac-a172-004b405b9401", + "x": 3, + "y": 18, + "w": 1, + "h": 1, + "content": "761" + }, + { + "id": "c2db0a5e-c83e-4537-84c4-1b6916a053ba", + "x": 3, + "y": 19, + "w": 1, + "h": 1, + "content": "482,978" + }, + { + "id": "20cdfcb8-0691-41fd-97ec-cc1dcbb82695", + "x": 3, + "y": 20, + "w": 1, + "h": 1, + "content": "23,350" + }, + { + "id": "8ca488c3-bc8c-46b7-a742-7d3de4691aef", + "x": 3, + "y": 21, + "w": 1, + "h": 1, + "content": "1,991" + }, + { + "id": "fcae272e-ae3d-487a-b143-dbae95e41c56", + "x": 3, + "y": 22, + "w": 1, + "h": 1, + "content": "\u2014" + }, + { + "id": "b70f8af7-fa14-4ae0-9010-32756d5a6073", + "x": 3, + "y": 23, + "w": 1, + "h": 1, + "content": "508,319" + } + ] + }, + { + "type": "NarrativeText", + "text": "The accompanying notes are an integral part of these financial statements." + }, + { + "type": "NarrativeText", + "text": "These financial statements were signed by the Managing Director and the Director of Finance on June 24, 2022." + }, + { + "type": "Value" + }, + { + "type": "NarrativeText", + "text": "Kristalina Georgieva /s/ Managing Director" + }, + { + "type": "Value" + }, + { + "type": "NarrativeText", + "text": "Bernard Lauwers /s/ Director, Finance Department" + }, + { + "type": "PageNumber", + "text": 7 + }, + { + "type": "Footer" + } +] \ No newline at end of file diff --git a/example-docs/test_evaluate_files/unstructured_output_table_structure/2022-financial-statements-p11.pdf.json b/example-docs/test_evaluate_files/unstructured_output_table_structure/2022-financial-statements-p11.pdf.json new file mode 100644 index 0000000000..4a1cc5c021 --- /dev/null +++ b/example-docs/test_evaluate_files/unstructured_output_table_structure/2022-financial-statements-p11.pdf.json @@ -0,0 +1,2041 @@ +[ + { + "element_id": "65cc604a285e9b4833c3515c205525f1", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 164.4, + 135.9 + ], + [ + 164.4, + 158.2 + ], + [ + 402.1, + 158.2 + ], + [ + 402.1, + 135.9 + ] + ], + "system": "PixelSpace" + }, + "detection_class_prob": 0.76059, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "I. General Department", + "type": "Header" + }, + { + "element_id": "525f06761160ee0502ed343a27144bf3", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 165.0, + 281.1 + ], + [ + 165.0, + 353.2 + ], + [ + 1281.2, + 353.2 + ], + [ + 1281.2, + 281.1 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1, + "parent_id": "65cc604a285e9b4833c3515c205525f1" + }, + "text": "Statements of Financial Position at April 30, 2022, and 2021 (in millions of SDRs)", + "type": "UncategorizedText" + }, + { + "element_id": "335f6e936ff6d431fb172bdcb3c45f62", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 178.3, + 368.5 + ], + [ + 178.3, + 1285.5 + ], + [ + 1565.2, + 1285.5 + ], + [ + 1565.2, + 368.5 + ] + ], + "system": "PixelSpace" + }, + "detection_class_prob": 0.92937, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1, + "parent_id": "65cc604a285e9b4833c3515c205525f1", + "table_as_cells": [ + { + "content": "Assets", + "h": 1, + "w": 1, + "x": 0, + "y": 0 + }, + { + "content": "Usable currencies", + "h": 1, + "w": 1, + "x": 0, + "y": 1 + }, + { + "content": "Other currencies", + "h": 1, + "w": 1, + "x": 0, + "y": 2 + }, + { + "content": "Credit outstanding", + "h": 1, + "w": 1, + "x": 0, + "y": 3 + }, + { + "content": "Total currencies", + "h": 1, + "w": 1, + "x": 0, + "y": 4 + }, + { + "content": "SDR holdings", + "h": 1, + "w": 1, + "x": 0, + "y": 5 + }, + { + "content": "Investments", + "h": 1, + "w": 1, + "x": 0, + "y": 6 + }, + { + "content": "Gold holdings", + "h": 1, + "w": 1, + "x": 0, + "y": 7 + }, + { + "content": "Property, plant and equipment and intangible assets", + "h": 1, + "w": 1, + "x": 0, + "y": 8 + }, + { + "content": "Net assets under retirement benefit plans", + "h": 1, + "w": 1, + "x": 0, + "y": 9 + }, + { + "content": "Other assets", + "h": 1, + "w": 1, + "x": 0, + "y": 10 + }, + { + "content": "Total assets", + "h": 1, + "w": 1, + "x": 0, + "y": 11 + }, + { + "content": "Special Contingent Account", + "h": 1, + "w": 1, + "x": 0, + "y": 13 + }, + { + "content": "Borrowings", + "h": 1, + "w": 1, + "x": 0, + "y": 14 + }, + { + "content": "Quota subscriptions", + "h": 1, + "w": 1, + "x": 0, + "y": 15 + }, + { + "content": "Net liabilities under retirement benefit plans", + "h": 1, + "w": 1, + "x": 0, + "y": 16 + }, + { + "content": "Other liabilities", + "h": 1, + "w": 1, + "x": 0, + "y": 17 + }, + { + "content": "otal li S", + "h": 1, + "w": 1, + "x": 0, + "y": 18 + }, + { + "content": "eserves of the General Resources Account", + "h": 1, + "w": 1, + "x": 0, + "y": 19 + }, + { + "content": "Retained earnings of the Investment Account", + "h": 1, + "w": 1, + "x": 0, + "y": 20 + }, + { + "content": "Resources of the Special Disbursement Account", + "h": 1, + "w": 1, + "x": 0, + "y": 21 + }, + { + "content": "otal lial S, reserves, retained earnings, and resources", + "h": 1, + "w": 1, + "x": 0, + "y": 22 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 0 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 1 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 2 + }, + { + "content": "5", + "h": 1, + "w": 1, + "x": 1, + "y": 3 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 4 + }, + { + "content": "6", + "h": 1, + "w": 1, + "x": 1, + "y": 5 + }, + { + "content": "7", + "h": 1, + "w": 1, + "x": 1, + "y": 6 + }, + { + "content": "9", + "h": 1, + "w": 1, + "x": 1, + "y": 7 + }, + { + "content": "10", + "h": 1, + "w": 1, + "x": 1, + "y": 8 + }, + { + "content": "11", + "h": 1, + "w": 1, + "x": 1, + "y": 9 + }, + { + "content": "12", + "h": 1, + "w": 1, + "x": 1, + "y": 10 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 11 + }, + { + "content": "13", + "h": 1, + "w": 1, + "x": 1, + "y": 13 + }, + { + "content": "14", + "h": 1, + "w": 1, + "x": 1, + "y": 14 + }, + { + "content": "15", + "h": 1, + "w": 1, + "x": 1, + "y": 15 + }, + { + "content": "11", + "h": 1, + "w": 1, + "x": 1, + "y": 16 + }, + { + "content": "12", + "h": 1, + "w": 1, + "x": 1, + "y": 17 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 18 + }, + { + "content": "16", + "h": 1, + "w": 1, + "x": 1, + "y": 19 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 20 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 21 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 1, + "y": 22 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 2, + "y": 0 + }, + { + "content": "292,280", + "h": 1, + "w": 1, + "x": 2, + "y": 1 + }, + { + "content": "69,407", + "h": 1, + "w": 1, + "x": 2, + "y": 2 + }, + { + "content": "93,031", + "h": 1, + "w": 1, + "x": 2, + "y": 3 + }, + { + "content": "454,718", + "h": 1, + "w": 1, + "x": 2, + "y": 4 + }, + { + "content": "22,270", + "h": 1, + "w": 1, + "x": 2, + "y": 5 + }, + { + "content": "25,418", + "h": 1, + "w": 1, + "x": 2, + "y": 6 + }, + { + "content": "3,167", + "h": 1, + "w": 1, + "x": 2, + "y": 7 + }, + { + "content": "551", + "h": 1, + "w": 1, + "x": 2, + "y": 8 + }, + { + "content": "1,375", + "h": 1, + "w": 1, + "x": 2, + "y": 9 + }, + { + "content": "911", + "h": 1, + "w": 1, + "x": 2, + "y": 10 + }, + { + "content": "508,410", + "h": 1, + "w": 1, + "x": 2, + "y": 11 + }, + { + "content": "\u2014", + "h": 1, + "w": 1, + "x": 2, + "y": 13 + }, + { + "content": "2,615", + "h": 1, + "w": 1, + "x": 2, + "y": 14 + }, + { + "content": "476,272", + "h": 1, + "w": 1, + "x": 2, + "y": 15 + }, + { + "content": "127", + "h": 1, + "w": 1, + "x": 2, + "y": 16 + }, + { + "content": "970", + "h": 1, + "w": 1, + "x": 2, + "y": 17 + }, + { + "content": "479,984", + "h": 1, + "w": 1, + "x": 2, + "y": 18 + }, + { + "content": "26,524", + "h": 1, + "w": 1, + "x": 2, + "y": 19 + }, + { + "content": "1,902", + "h": 1, + "w": 1, + "x": 2, + "y": 20 + }, + { + "content": "\u2014", + "h": 1, + "w": 1, + "x": 2, + "y": 21 + }, + { + "content": "508,410", + "h": 1, + "w": 1, + "x": 2, + "y": 22 + }, + { + "content": "", + "h": 1, + "w": 1, + "x": 3, + "y": 0 + }, + { + "content": "297,217", + "h": 1, + "w": 1, + "x": 3, + "y": 1 + }, + { + "content": "71,651", + "h": 1, + "w": 1, + "x": 3, + "y": 2 + }, + { + "content": "89,788", + "h": 1, + "w": 1, + "x": 3, + "y": 3 + }, + { + "content": "458,656", + "h": 1, + "w": 1, + "x": 3, + "y": 4 + }, + { + "content": "22,203", + "h": 1, + "w": 1, + "x": 3, + "y": 5 + }, + { + "content": "23,032", + "h": 1, + "w": 1, + "x": 3, + "y": 6 + }, + { + "content": "3,167", + "h": 1, + "w": 1, + "x": 3, + "y": 7 + }, + { + "content": "555", + "h": 1, + "w": 1, + "x": 3, + "y": 8 + }, + { + "content": "_\u2014", + "h": 1, + "w": 1, + "x": 3, + "y": 9 + }, + { + "content": "706", + "h": 1, + "w": 1, + "x": 3, + "y": 10 + }, + { + "content": "508,319", + "h": 1, + "w": 1, + "x": 3, + "y": 11 + }, + { + "content": "1,066", + "h": 1, + "w": 1, + "x": 3, + "y": 13 + }, + { + "content": "5,138", + "h": 1, + "w": 1, + "x": 3, + "y": 14 + }, + { + "content": "475,808", + "h": 1, + "w": 1, + "x": 3, + "y": 15 + }, + { + "content": "205", + "h": 1, + "w": 1, + "x": 3, + "y": 16 + }, + { + "content": "761", + "h": 1, + "w": 1, + "x": 3, + "y": 17 + }, + { + "content": "482,978", + "h": 1, + "w": 1, + "x": 3, + "y": 18 + }, + { + "content": "23,350", + "h": 1, + "w": 1, + "x": 3, + "y": 19 + }, + { + "content": "1,991", + "h": 1, + "w": 1, + "x": 3, + "y": 20 + }, + { + "content": "\u2014", + "h": 1, + "w": 1, + "x": 3, + "y": 21 + }, + { + "content": "508,319", + "h": 1, + "w": 1, + "x": 3, + "y": 22 + }, + { + "content": "-iabilities", + "h": 1, + "w": 4, + "x": 0, + "y": 12 + } + ], + "text_as_html": "
Assets
Usable currencies292,280297,217
Other currencies69,40771,651
Credit outstanding593,03189,788
Total currencies454,718458,656
SDR holdings622,27022,203
Investments725,41823,032
Gold holdings93,1673,167
Property, plant and equipment and intangible assets10551555
Net assets under retirement benefit plans111,375_\u2014
Other assets12911706
Total assets508,410508,319
-iabilities
Special Contingent Account13\u20141,066
Borrowings142,6155,138
Quota subscriptions15476,272475,808
Net liabilities under retirement benefit plans11127205
Other liabilities12970761
otal li S479,984482,978
eserves of the General Resources Account1626,52423,350
Retained earnings of the Investment Account1,9021,991
Resources of the Special Disbursement Account\u2014\u2014
otal lial S, reserves, retained earnings, and resources508,410508,319
" + }, + "text": "Note 2022 2021 Usable currencies 292,280 297,217 Other currencies 69,407 71,651 Credit outstanding 5 93,031 89,788 Total currencies 454,718 458,656 SDR holdings 6 22,270 22,203 Investments 7 25,418 23,032 Gold holdings 9 3,167 3,167 Property, plant and equipment and intangible assets 10 551 555 Net assets under retirement benefit plans 11 1,375 \u2014 Other assets 12 911 706 508,410 508,319 Special Contingent Account 13 \u2014 1,066 Borrowings 14 2,615 5,138 Quota subscriptions 15 476,272 475,808 Net liabilities under retirement benefit plans 11 127 205 Other liabilities 12 970 761 479,984 482,978 16 26,524 23,350 1,902 1,991 \u2014 \u2014 Total liabilities, reserves, retained earnings, and resources 508,410 508,319", + "type": "Table" + }, + { + "element_id": "83333e79d07d5369c1ea81ac84fece49", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 427.6 + ], + [ + 173.0, + 448.4 + ], + [ + 247.1, + 448.4 + ], + [ + 247.1, + 427.6 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1, + "parent_id": "65cc604a285e9b4833c3515c205525f1" + }, + "text": "Assets", + "type": "Title" + }, + { + "element_id": "477f9805ddecc50afee64c176c77214d", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 453.3 + ], + [ + 163.0, + 458.7 + ], + [ + 1581.8, + 458.7 + ], + [ + 1581.8, + 453.3 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "ae3bfb88b2bce7580b08ba6cf81d2793", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 490.7 + ], + [ + 163.0, + 496.0 + ], + [ + 1581.8, + 496.0 + ], + [ + 1581.8, + 490.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "40d26315a107679ee6ba3a24e6ab3671", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 528.0 + ], + [ + 163.0, + 533.3 + ], + [ + 1581.8, + 533.3 + ], + [ + 1581.8, + 528.0 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "7c661779574aaac5f64a3896af6927d7", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 565.3 + ], + [ + 163.0, + 570.7 + ], + [ + 1581.8, + 570.7 + ], + [ + 1581.8, + 565.3 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "3547fc946bf088d451ca52fbb2487cac", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 602.7 + ], + [ + 163.0, + 608.0 + ], + [ + 1581.8, + 608.0 + ], + [ + 1581.8, + 602.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "df6263781f356939f404b5174102d255", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 640.2 + ], + [ + 163.0, + 645.5 + ], + [ + 1581.8, + 645.5 + ], + [ + 1581.8, + 640.2 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "dd09db523d1582839b2de71a9a5e8b12", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 677.5 + ], + [ + 163.0, + 682.8 + ], + [ + 1581.8, + 682.8 + ], + [ + 1581.8, + 677.5 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "8f1bb5f226d788b3e90b8f80020bffd2", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 714.8 + ], + [ + 163.0, + 720.2 + ], + [ + 1581.8, + 720.2 + ], + [ + 1581.8, + 714.8 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "764d462609915647d3759b1d83f274c5", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 752.2 + ], + [ + 163.0, + 757.5 + ], + [ + 1581.8, + 757.5 + ], + [ + 1581.8, + 752.2 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "4a56f843b424a3fa37380213b09caa70", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 789.5 + ], + [ + 163.0, + 794.8 + ], + [ + 1581.8, + 794.8 + ], + [ + 1581.8, + 789.5 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "4ab86a74a79c6de7444538a082b6bb1a", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 826.8 + ], + [ + 163.0, + 832.2 + ], + [ + 1052.3, + 832.2 + ], + [ + 1052.3, + 826.8 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "871c8a1cf2b1e6c6d00c0b1ba1ee40c6", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 1438.5, + 827.5 + ], + [ + 1438.5, + 831.5 + ], + [ + 1581.3, + 831.5 + ], + [ + 1581.3, + 827.5 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "6a23a5778b61a46218258943739d73ae", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 838.6 + ], + [ + 173.0, + 859.4 + ], + [ + 299.3, + 859.4 + ], + [ + 299.3, + 838.6 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Total assets", + "type": "Title" + }, + { + "element_id": "0caa2844f3a44f1eb9cf1ec9c5b47a2a", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 881.9 + ], + [ + 173.0, + 902.7 + ], + [ + 275.0, + 902.7 + ], + [ + 275.0, + 881.9 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Liabilities", + "type": "Title" + }, + { + "element_id": "0e820672ba8ab9c9a6794b5a5d6d3f77", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 906.2 + ], + [ + 163.0, + 911.5 + ], + [ + 1581.8, + 911.5 + ], + [ + 1581.8, + 906.2 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "f9b633213fd5d561ca3cf1c1a87b05fb", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 943.7 + ], + [ + 163.0, + 949.0 + ], + [ + 1581.8, + 949.0 + ], + [ + 1581.8, + 943.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "522c433f09c8953d1f32c0fdc2de5784", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 981.0 + ], + [ + 163.0, + 986.3 + ], + [ + 1581.8, + 986.3 + ], + [ + 1581.8, + 981.0 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "5ed918dcb33852f4f4594fc7d0f24f67", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1018.3 + ], + [ + 163.0, + 1023.7 + ], + [ + 1581.8, + 1023.7 + ], + [ + 1581.8, + 1018.3 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "771cbe1b8698a307a53dcb7aa6d9adae", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1055.7 + ], + [ + 163.0, + 1061.0 + ], + [ + 1581.8, + 1061.0 + ], + [ + 1581.8, + 1055.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "abd2d4f5b0f381be46e69dbee1350f68", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1093.0 + ], + [ + 163.0, + 1098.3 + ], + [ + 1052.3, + 1098.3 + ], + [ + 1052.3, + 1093.0 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "05d62f9687aafdf4e682faf86157d1f5", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 1438.5, + 1093.7 + ], + [ + 1438.5, + 1097.7 + ], + [ + 1581.3, + 1097.7 + ], + [ + 1581.3, + 1093.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "64bc16b4cf986ed144be6d53e5ac952b", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 1104.7 + ], + [ + 173.0, + 1125.6 + ], + [ + 323.5, + 1125.6 + ], + [ + 323.5, + 1104.7 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Total liabilities", + "type": "Title" + }, + { + "element_id": "1e06119c828c6c4389f59b56ea395d34", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 1142.1 + ], + [ + 173.0, + 1162.9 + ], + [ + 590.9, + 1162.9 + ], + [ + 590.9, + 1142.1 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Reserves of the General Resources Account", + "type": "Title" + }, + { + "element_id": "7dd7d1fd11bc18b1e374549b576d1f29", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1167.8 + ], + [ + 163.0, + 1173.2 + ], + [ + 1581.8, + 1173.2 + ], + [ + 1581.8, + 1167.8 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "26fab233fef39936e212028c23d5e453", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 1179.6 + ], + [ + 173.0, + 1200.4 + ], + [ + 594.5, + 1200.4 + ], + [ + 594.5, + 1179.6 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Retained earnings of the Investment Account", + "type": "NarrativeText" + }, + { + "element_id": "d557775a955ec1af6de2e52f473d8419", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1205.2 + ], + [ + 163.0, + 1210.5 + ], + [ + 1581.8, + 1210.5 + ], + [ + 1581.8, + 1205.2 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "f86bcd3d03727122b197d75c4e73720a", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 173.0, + 1216.9 + ], + [ + 173.0, + 1237.7 + ], + [ + 625.6, + 1237.7 + ], + [ + 625.6, + 1216.9 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Resources of the Special Disbursement Account", + "type": "Title" + }, + { + "element_id": "7365c6ae9bf82f3638fd89674a017674", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 163.0, + 1242.5 + ], + [ + 163.0, + 1247.8 + ], + [ + 1052.3, + 1247.8 + ], + [ + 1052.3, + 1242.5 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "6b982880d0aa2a409eebd8bdad4c5ee7", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 1438.5, + 1243.2 + ], + [ + 1438.5, + 1247.2 + ], + [ + 1581.3, + 1247.2 + ], + [ + 1581.3, + 1243.2 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "", + "type": "Image" + }, + { + "element_id": "a61b273b6924ef539ab19372ec8f9d29", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 165.0, + 1300.0 + ], + [ + 165.0, + 1339.8 + ], + [ + 1071.4, + 1339.8 + ], + [ + 1071.4, + 1300.0 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "The accompanying notes are an integral part of these financial statements. These financial statements were signed by the Managing Director and the Director of Finance on June 24, 2022.", + "type": "NarrativeText" + }, + { + "element_id": "34420b5b7fa4d3125118bc071d5762e3", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 172.1, + 1607.0 + ], + [ + 172.1, + 1658.9 + ], + [ + 415.9, + 1658.9 + ], + [ + 415.9, + 1607.0 + ] + ], + "system": "PixelSpace" + }, + "detection_class_prob": 0.66378, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Kristalina Georgieva /s/ Managing Director", + "type": "NarrativeText" + }, + { + "element_id": "73c1f3e3b4e289ab4fac59e1c7efb58d", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 992.5, + 1607.0 + ], + [ + 992.5, + 1659.1 + ], + [ + 1294.8, + 1659.1 + ], + [ + 1294.8, + 1607.0 + ] + ], + "system": "PixelSpace" + }, + "detection_class_prob": 0.72551, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "Bernard Lauwers /s/ Director, Finance Department", + "type": "NarrativeText" + }, + { + "element_id": "545721361a36a5a11ede506e3d442ccc", + "metadata": { + "coordinates": { + "layout_height": 2250, + "layout_width": 1750, + "points": [ + [ + 1572.7, + 2149.0 + ], + [ + 1572.7, + 2171.2 + ], + [ + 1591.2, + 2171.2 + ], + [ + 1591.2, + 2149.0 + ] + ], + "system": "PixelSpace" + }, + "file_directory": "/Users/yaoyou/Downloads/mini-holistic-all/src", + "filename": "2022-financial-statements-p11.pdf", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "last_modified": "2024-01-11T09:49:09", + "page_number": 1 + }, + "text": "7", + "type": "UncategorizedText" + } +] \ No newline at end of file diff --git a/test_unstructured/metrics/test_evaluate.py b/test_unstructured/metrics/test_evaluate.py index dae5dfa326..d5d70775d1 100644 --- a/test_unstructured/metrics/test_evaluate.py +++ b/test_unstructured/metrics/test_evaluate.py @@ -115,7 +115,7 @@ def test_text_extraction_evaluation(): UNSTRUCTURED_TABLE_STRUCTURE_DIRNAME, GOLD_TABLE_STRUCTURE_DIRNAME, Path("IRS-2023-Form-1095-A.pdf.json"), - 13, + 14, {}, ), ( @@ -190,9 +190,16 @@ def test_table_structure_evaluation(): assert os.path.isfile(os.path.join(export_dir, "all-docs-table-structure-accuracy.tsv")) assert os.path.isfile(os.path.join(export_dir, "aggregate-table-structure-accuracy.tsv")) df = pd.read_csv(os.path.join(export_dir, "all-docs-table-structure-accuracy.tsv"), sep="\t") - assert len(df) == 1 - assert len(df.columns) == 13 - assert df.iloc[0].filename == "IRS-2023-Form-1095-A.pdf" + agg_df = pd.read_csv( + os.path.join(export_dir, "aggregate-table-structure-accuracy.tsv"), sep="\t" + ).set_index("metric") + assert len(df) == 2 + assert len(df.columns) == 15 + assert df.iloc[1].filename == "IRS-2023-Form-1095-A.pdf" + assert ( + np.round(np.average(df["table_level_acc"], weights=df["total_tables"]), 3) + == agg_df.loc["table_level_acc", "average"] + ) @pytest.mark.skipif(is_in_docker, reason="Skipping this test in Docker container") diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 209cc04b50..1fb1ed848b 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.16.6-dev1" # pragma: no cover +__version__ = "0.16.6-dev2" # pragma: no cover diff --git a/unstructured/metrics/evaluate.py b/unstructured/metrics/evaluate.py index 05eab09c8e..1ff498579a 100755 --- a/unstructured/metrics/evaluate.py +++ b/unstructured/metrics/evaluate.py @@ -12,6 +12,7 @@ from pathlib import Path from typing import List, Optional, Union +import numpy as np import pandas as pd from tqdm import tqdm @@ -50,6 +51,13 @@ logger.setLevel(logging.DEBUG) AGG_HEADERS = ["metric", "average", "sample_sd", "population_sd", "count"] +AGG_HEADERS_MAPPING = { + "index": "metric", + "_mean": "average", + "_stdev": "sample_sd", + "_pstdev": "population_sd", + "_count": "count", +} OUTPUT_TYPE_OPTIONS = ["json", "txt"] @@ -266,6 +274,7 @@ def _process_document(self, doc: Path) -> Optional[list]: out_filename, doctype, connector, + report_from_html.total_predicted_tables, ] + [getattr(report_from_html, metric) for metric in self.supported_metric_names] def _generate_dataframes(self, rows): @@ -273,10 +282,15 @@ def _generate_dataframes(self, rows): "filename", "doctype", "connector", + "total_predicted_tables", ] + self.supported_metric_names df = pd.DataFrame(rows, columns=headers) - has_tables_df = df[df["total_tables"] > 0] + df["_table_weights"] = df["total_tables"] + # we give false positive tables a 1 table worth of weight in computing table level acc + df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1 + # filter down to only those with actual and/or predicted tables + has_tables_df = df[df["_table_weights"] > 0] if has_tables_df.empty: agg_df = pd.DataFrame( @@ -286,7 +300,21 @@ def _generate_dataframes(self, rows): element_metrics_results = {} for metric in self.supported_metric_names: metric_df = has_tables_df[has_tables_df[metric].notnull()] - agg_metric = metric_df[metric].agg([_mean, _stdev, _pstdev, _count]).transpose() + agg_metric = metric_df[metric].agg([_stdev, _pstdev, _count]).transpose() + if metric.startswith("total_tables"): + agg_metric["_mean"] = metric_df[metric].mean() + elif metric.startswith("table_level_acc"): + agg_metric["_mean"] = np.round( + np.average(metric_df[metric], weights=metric_df["_table_weights"]), + 3, + ) + else: + # false positive tables do not contribute to table structure and content + # extraction metrics + agg_metric["_mean"] = np.round( + np.average(metric_df[metric], weights=metric_df["total_tables"]), + 3, + ) if agg_metric.empty: element_metrics_results[metric] = pd.Series( data=[None, None, None, 0], index=["_mean", "_stdev", "_pstdev", "_count"] @@ -294,7 +322,7 @@ def _generate_dataframes(self, rows): else: element_metrics_results[metric] = agg_metric agg_df = pd.DataFrame(element_metrics_results).transpose().reset_index() - agg_df.columns = AGG_HEADERS + agg_df = agg_df.rename(columns=AGG_HEADERS_MAPPING) return df, agg_df diff --git a/unstructured/metrics/table/table_eval.py b/unstructured/metrics/table/table_eval.py index 375c546efa..7ec2b7b016 100644 --- a/unstructured/metrics/table/table_eval.py +++ b/unstructured/metrics/table/table_eval.py @@ -41,6 +41,7 @@ class TableEvaluation: """Class representing a gathered table metrics.""" total_tables: int + total_predicted_tables: int table_level_acc: float table_detection_recall: float table_detection_precision: float @@ -247,6 +248,7 @@ def process_file(self) -> TableEvaluation: table_acc = 1 if not is_table_predicted else 0 return TableEvaluation( total_tables=0, + total_predicted_tables=len(predicted_table_data), table_level_acc=table_acc, table_detection_recall=score, table_detection_precision=score, @@ -259,6 +261,7 @@ def process_file(self) -> TableEvaluation: if is_table_in_gt and not is_table_predicted: return TableEvaluation( total_tables=len(ground_truth_table_data), + total_predicted_tables=0, table_level_acc=0, table_detection_recall=0, table_detection_precision=0, @@ -294,6 +297,7 @@ def process_file(self) -> TableEvaluation: evaluation = TableEvaluation( total_tables=len(ground_truth_table_data), + total_predicted_tables=len(predicted_table_data), table_level_acc=predicted_table_acc, table_detection_recall=table_detection_recall, table_detection_precision=table_detection_precision,