diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json index 80f32e2723..5b219bcba1 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json @@ -1,7 +1,7 @@ [ { "type": "Title", - "element_id": "8b115710b659086909de658b116dd719", + "element_id": "a4cabe4ff86ebc893bfe677cd98658cb", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -16,7 +16,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "a Department of the Treasury Internal Revenue Service Instructions for Form 3115 (Rev. November 1987) Application for Change in Accounting Method" + "text": "aa Department of the Treasury Internal Revenue Service Instructions for Form 3115 (Rev. November 1987) Application for Change in Accounting Method" }, { "type": "NarrativeText", @@ -153,7 +153,7 @@ }, { "type": "NarrativeText", - "element_id": "fdb8017fc73bdc12f7200dece8b76c99", + "element_id": "c6de680970d2536e2807f12e00e3de81", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -168,7 +168,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "File this form to request a change in your accounting method, including the accounting treatment of any item. If you are requesting a change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods." + "text": "File this form to request a change in your accounting method, including the accounting treatment of any item. If you are requesting a change In accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods." }, { "type": "UncategorizedText", @@ -210,7 +210,7 @@ }, { "type": "NarrativeText", - "element_id": "7e3ae97a65f12ef0bb8b4d6b5f721f54", + "element_id": "d3c76e8e037d3aec863db1d768b81f6d", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -225,11 +225,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "filing taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form 3115)," + "text": "filing taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form 3115)." }, { "type": "Title", - "element_id": "093856d810a56c1557ce2b24c65abf3d", + "element_id": "1f32b4d1bbe309ba7afbef5a988c1b97", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -244,7 +244,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Long-term contracts. —If" + "text": "Long-term contracts.—If" }, { "type": "NarrativeText", @@ -286,7 +286,7 @@ }, { "type": "NarrativeText", - "element_id": "6272a6df76820c927d081a1041e3c079", + "element_id": "3d1a1ef1d14ac329f7bb45f91bc78ab2", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -301,7 +301,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Other methods.—Unless the Service has published a regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these" + "text": "Other methods.—Unless the Service has published a regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these" }, { "type": "Title", @@ -324,7 +324,7 @@ }, { "type": "NarrativeText", - "element_id": "faf2673a7d6b6f7c5bf7cae6770a4130", + "element_id": "79072c203dc7f83a12d299f46e1940d3", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -339,11 +339,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Generally, applicants must complete Section In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." + "text": "Generally, applicants must complete Section In addition, complete the appropriate sections through H) for which a change !s desired." }, { "type": "UncategorizedText", - "element_id": "e53657178cb6855ac4b2029197a64b0c", + "element_id": "f9e422a842642faeae2be2f01739c08c", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -358,7 +358,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "A." + "text": "A. (B-1" }, { "type": "NarrativeText", @@ -381,7 +381,7 @@ }, { "type": "NarrativeText", - "element_id": "10626f80b0f7b25e661f8f82f5d7c454", + "element_id": "e20bc44cd2b2835c2a309c456be82dc9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -396,7 +396,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "State whether you desire a conference in National Office if the Service proposes to disapprove your application." + "text": "State whether you desire a conference In National Office if the Service proposes to disapprove your application." }, { "type": "Title", @@ -438,7 +438,7 @@ }, { "type": "NarrativeText", - "element_id": "582deac2def308ecc5250773e1683052", + "element_id": "93fbf5303184ecb1856a70570693454c", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -453,11 +453,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change 1s treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required." + "text": "Uniform capitalization rules and limitation on cash method.—lf you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change is treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required." }, { "type": "NarrativeText", - "element_id": "550f9e99054c657264fb9bb26d3023de", + "element_id": "5109bb5c3ab0b7dc4b0c9bb275247942", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -472,7 +472,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." + "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448”). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." }, { "type": "Title", @@ -666,7 +666,7 @@ }, { "type": "NarrativeText", - "element_id": "8605ee209656c311cec7ce4b001caab2", + "element_id": "ec3c2d03b846d2a186fc9a8f318f688b", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -681,11 +681,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Individuals.—An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." + "text": "Individuals. —An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both." }, { "type": "NarrativeText", - "element_id": "7d82c5876c5c1a3596338ae8cfbd1a50", + "element_id": "b7dbbe92002f45ee479e8a71df57370a", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -700,7 +700,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Others.-—The employer identification number applicant other than an individual should be entered in this block." + "text": "Others.-—The employer identification number applicant other than an individual should be entered tn this block." }, { "type": "Title", @@ -856,7 +856,7 @@ }, { "type": "NarrativeText", - "element_id": "52e2b8e4b8527ae448e9db2dfd0c43c7", + "element_id": "ffb9aff0c22a1c0e946a92401aaf99b7", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -871,7 +871,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." + "text": "Preparer other than partner, officer, etc. —The signature of the individual preparing the application should appear in the space provided on page 6." }, { "type": "Title", @@ -1008,7 +1008,7 @@ }, { "type": "NarrativeText", - "element_id": "9eefeb9556d95a8dd563ff3270cae7f4", + "element_id": "2653a3fbca8e1961ec4795eec6b976d0", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1023,11 +1023,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. In addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you are a resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax is legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." + "text": "Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. In addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities), However, if you area resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax is legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." }, { "type": "NarrativeText", - "element_id": "3e63f740940cd3ab94c17d2bbf48b13a", + "element_id": "372359d8718b28cc34e7a5f1fdd05213", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1042,11 +1042,11 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "Item 7b, page 2.—If item 7b 1s “Yes,” indicate on a separate sheet the following for each separate trade or business: Nature of business" + "text": "Item 7b, page 2.—If item 7b 1s “Yes,” indicate ona separate sheet the following for each separate trade or business: Nature of business" }, { "type": "NarrativeText", - "element_id": "3db206c935841c3dcd5b3a1d41e56b84", + "element_id": "990c59561e83ad83280a26812cc33ab0", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1061,7 +1061,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or is also changing its accounting method as part of this request or as a separate request." + "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or Is also changing its accounting method as part of this request or as a separate request." }, { "type": "NarrativeText", @@ -1217,7 +1217,7 @@ }, { "type": "NarrativeText", - "element_id": "751abc8c6a0fa412c3e8c18345f57f95", + "element_id": "638649dc63a640c24cf9ba21bebffa81", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1232,7 +1232,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." + "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” is not acceptable. However, “more than 6 years” Is acceptable" }, { "type": "Title", @@ -1255,7 +1255,7 @@ }, { "type": "NarrativeText", - "element_id": "e4a695ea83818204438fe08add6d1554", + "element_id": "ac99f69fcde53d48fdece4f7dc2a6a81", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1270,7 +1270,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year in which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application." + "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year In which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application." }, { "type": "Title", @@ -1293,7 +1293,7 @@ }, { "type": "NarrativeText", - "element_id": "eac562ca19f6198691856c695e2790bd", + "element_id": "b30d1c850b531b1140d260accc648823", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1308,7 +1308,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." + "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 Is treated as aC corporation with respect to its unrelated trade or business activities." }, { "type": "NarrativeText", @@ -1331,7 +1331,7 @@ }, { "type": "NarrativeText", - "element_id": "69bd87b2ad5873c030748e62adf61b89", + "element_id": "34da6cfdb7e012ccc7d509546e90cbde", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1346,11 +1346,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." + "text": "(1) Farming businesses. —F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." }, { "type": "NarrativeText", - "element_id": "df67e4b3a4a1352209c2648b87d675e2", + "element_id": "a399c3290c0a44657dd19be7aacb3f75", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1365,7 +1365,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" + "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” !s any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" }, { "type": "Title", @@ -1445,7 +1445,7 @@ }, { "type": "NarrativeText", - "element_id": "b68a5b5b0d59122e0df42a96d68d2b5e", + "element_id": "722d8f1e800a403021e2e339cc935b0c", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1460,7 +1460,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not in existence for the entire 3-year period, the period of existence is used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months in the short period." + "text": "(3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not in existence for the entire 3-year period, the period of existence is used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months tn the short period." }, { "type": "NarrativeText", @@ -1521,7 +1521,7 @@ }, { "type": "NarrativeText", - "element_id": "7e90b155b5cdb2481b1dfbb1118142c5", + "element_id": "b54e5aba8343634d50577f3e0858299f", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1536,7 +1536,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Inventories of retail merchants.—The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever Is lower, by the adjustments required in Regulations section 1.471-8." + "text": "Inventories of retail merchants. —The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever is lower, by the adjustments required in Regulations section 1.471-8." }, { "type": "NarrativeText", @@ -1578,7 +1578,7 @@ }, { "type": "NarrativeText", - "element_id": "347f638641329c72c971a522ec07f6b1", + "element_id": "f7d93af7606417ddb48746b57fc91f61", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1593,7 +1593,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(1) The specific types and classes of goods in the LIFO inventories involved in the proposed changes and the comparative value of such Inventories as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market)." + "text": "(1) The specific types and classes of goods in the LIFO inventories involved in the proposed changes and the comparative value of such inventories as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market)." }, { "type": "Title", @@ -1635,7 +1635,7 @@ }, { "type": "NarrativeText", - "element_id": "aca21cfeadca7d527dd36f01005ff44a", + "element_id": "f593d70048f37eb6523965b029e1f4c9", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1650,7 +1650,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "(2) proposed and valuation methods conform to the inventory method currently used with respect to non-LIFO Inventories, if any, or how such method is otherwise consistent with Regulations section 1.4726." + "text": "(2) proposed and valuation methods conform to the inventory method currently used with respect to non-LIFO inventories, if any, or how such method 1s otherwise consistent with Regulations section 1.472-6." }, { "type": "NarrativeText", @@ -1709,25 +1709,6 @@ }, "text": "Section D" }, - { - "type": "Title", - "element_id": "663ea1bfffe5038f3f0cf667f14c4257", - "metadata": { - "data_source": { - "url": "abfs://container1/IRS-form-1987.pdf", - "version": "307846589923949318200712033143133817358", - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/IRS-form-1987.pdf" - }, - "date_created": "2023-03-10T09:36:30+00:00", - "date_modified": "2023-03-10T09:36:30+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "to" - }, { "type": "Title", "element_id": "7574058dd32c12eb33bc649b5e36bdcb", @@ -1749,26 +1730,7 @@ }, { "type": "NarrativeText", - "element_id": "3167823c1d2039b4c48efe2f6c89b5c2", - "metadata": { - "data_source": { - "url": "abfs://container1/IRS-form-1987.pdf", - "version": "307846589923949318200712033143133817358", - "record_locator": { - "protocol": "abfs", - "remote_file_path": "container1/IRS-form-1987.pdf" - }, - "date_created": "2023-03-10T09:36:30+00:00", - "date_modified": "2023-03-10T09:36:30+00:00" - }, - "filetype": "application/pdf", - "page_number": 2 - }, - "text": "Applicants requesting change valuing property produced, property acquired for resale, or long-term contracts under section 263A or 460 MUST complete section D showing the treatment under both the present and proposed methods." - }, - { - "type": "UncategorizedText", - "element_id": "bbf3f11cb5b43e700273a78d12de55e4", + "element_id": "743ae5860daf07620dad25a5e98751a4", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1783,7 +1745,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "%" + "text": "Applicants requesting to change valuing property produced, property acquired for resale, or long-term contracts under section 263A or 460 MUST complete section D showing the treatment under both the present and proposed methods." }, { "type": "NarrativeText", @@ -1825,7 +1787,7 @@ }, { "type": "NarrativeText", - "element_id": "86fab9f7b35d56a2d48baf0782b7c53d", + "element_id": "eb6d37436a954d25b9bcd202cfd6f6bc", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1840,7 +1802,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Section 460(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that is not completed within the tax year in which it 1s entered into. However, a manufacturing contract will not qualify as a long-term contract unless the contract involves the manufacture of: (1) a unique item not normally included in your finished goods inventory, or (2) any item that normally requires more than 12 calendar months to complete." + "text": "Section 460(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that is not completed within the tax year in which it is entered into. However, a manufacturing contract will not qualify as a long-term contract unless the contract involves the manufacture of: (1) a unique item not normally included in your finished goods inventory, or (2) any item that normally requires more than 12 calendar months to complete." }, { "type": "NarrativeText", @@ -1901,7 +1863,7 @@ }, { "type": "NarrativeText", - "element_id": "cf5e2bc86b7c77533924eb940fd522d5", + "element_id": "cb3ed76a5c72bf690504075dc93b4189", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1916,11 +1878,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "This section Is to be used only to request a change in a method of accounting for depreciation under section 167." + "text": "This section is to be used only to request a change in a method of accounting for depreciation under section 167." }, { "type": "NarrativeText", - "element_id": "b8355dc568ea042f9da586188b404bca", + "element_id": "86fb13da8dea0952af182c3e0d5fcc47", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1935,11 +1897,11 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year in which it is desired to make the change. Attach a copy of the form to the income tax return for the tax year of the change." + "text": "Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year in which it is desired to make the change. Attach. copy of the form to the income tax return for the tax year of the change." }, { "type": "NarrativeText", - "element_id": "319882ba6726e29222f5522c53887960", + "element_id": "f583c2740e1a29d03b9d740d1ba9cc5e", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1954,7 +1916,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year in which the property 1s placed in service. In addition, Form 3115 is not to be used to request approval to revoke an election made under section 168. Such a request must be made in accordance with Rev. Proc. 87-1 (updated annually)." + "text": "Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year in which the property !s placed in service. In addition, Form 3115 is not to be used to request approval to revoke an election made under section 168. Such a request must be made in accordance with Rev. Proc. 87-1 (updated annually)." }, { "type": "Title", @@ -1977,7 +1939,7 @@ }, { "type": "NarrativeText", - "element_id": "cb1f664a186a87f6560cde136d70b558", + "element_id": "ead1e289dd392d5cec04a361a4c1fc89", "metadata": { "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", @@ -1992,7 +1954,7 @@ "filetype": "application/pdf", "page_number": 2 }, - "text": "Generally, this section should be used for requesting changes In a method of accounting for which provision has not been made elsewhere on this form. Attach additional pages if more space ts needed for a full explanation of the present method used and the proposed change requested." + "text": "Generally, this section should be used for requesting changes in a method of accounting for which provision has not been made elsewhere on this form. Attach additional pages if more space ts needed for a full explanation of the present method used and the proposed change requested." }, { "type": "NarrativeText", diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json index c60424db2a..0d20e48fdd 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/65/11/main.PMC6312790.pdf.json @@ -59,16 +59,6 @@ }, "text": "Data on environmental sustainable corrosion inhibitor for stainless steel in aggressive environment" }, - { - "type": "Title", - "element_id": "c21a7f75a507e8d1d940e30b66575616", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "(Jee" - }, { "type": "Title", "element_id": "6d1999c49562bd7c2b15a41327b8fc36", @@ -429,16 +419,6 @@ }, "text": "Fig. 2. Corrosion rate versus exposure time for stainless steel immersed in 0.5 M H2SO4 solution in the absence and presence of ES." }, - { - "type": "UncategorizedText", - "element_id": "57e2eb94df928d0cf17b2c0d41ae042e", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 3 - }, - "text": "100 4" - }, { "type": "UncategorizedText", "element_id": "ad57366865126e55649ecb23ae1d4888", @@ -521,13 +501,13 @@ }, { "type": "Image", - "element_id": "aa4f0eca72d0603d384878e68fe5be57", + "element_id": "506dff384be7ac4026b4227e860b3a39", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 4 }, - "text": "5 1 os = — 10; =o ° © —\" 205 i —~é é —ip a5 — Control -2 — & 2.5 T T T 0.0000001 + —-0.00001 0.001 O14 Current Density (A/cm2)" + "text": "25 14 os ~ — 1 2 0 i = —4 Zs 4 8 — bg 14 é — 2g 137 — Control 24 8g 25 T T T 0.0000001 0.00001 0.001 01 Current Density (A/cm2)" }, { "type": "FigureCaption", @@ -599,16 +579,6 @@ }, "text": "C/0" }, - { - "type": "UncategorizedText", - "element_id": "e2b6d7e2ab125149fa820500cedfffbb", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 4 - }, - "text": "—=—Cc/0" - }, { "type": "FigureCaption", "element_id": "a24d672821f7acb0bbe2c8a813debe16", @@ -631,13 +601,13 @@ }, { "type": "Image", - "element_id": "273fb301b173075f79b2cbdab962e2ff", + "element_id": "59b3c54b48b40dffd68bd4d3e1859e95", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "SEM HV: Q0KY WD: 14.89 rmrm ‘9EM MAO: 209 x Det: DOE Pectomsence In nanospact" + "text": "SEM HV: 20.0KV 9D: 14.90 men ‘DEM MAO: 209 x Det: DOE 260 om Pectormence In nanos pac:" }, { "type": "FigureCaption", @@ -661,13 +631,13 @@ }, { "type": "Image", - "element_id": "520d1da08c86ce165cd2843e2dc27f98", + "element_id": "032df41d39ff55ef057e900ef83bad04", "metadata": { "data_source": {}, "filetype": "application/pdf", "page_number": 5 }, - "text": "SEMHV: 20.0KV WD: 15.54 mm EM ING: ACO x Dei: OSE" + "text": "SEM HY: 70.0KV SEM IAAG: 400 x" }, { "type": "FigureCaption", @@ -739,6 +709,16 @@ }, "text": "Austenitic stainless steel Type 316 was used in this study with chemical composition reported in [1,2]. The chemicals used were of annular grade. The inhibitor concentrations are in the range of 2, 4, 6, 8 and 10 g [3–5]. The structural formula of egg shell powder is shown in Fig. 9." }, + { + "type": "Image", + "element_id": "ee7729e0ad3c974c68a2b6bc1f09378a", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 6 + }, + "text": "Tie} = O iy oH H3;COCHN™ OH" + }, { "type": "FigureCaption", "element_id": "389bd6e22f3ac105897fa0a75807197d", @@ -929,6 +909,16 @@ }, "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential (cid:3) 1.5 v, step potential 0.001 m/s and stop potential of þ1.5 v set was used in this study." }, + { + "type": "Title", + "element_id": "c9015d53b90846454375a2fdf2829c66", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "page_number": 7 + }, + "text": "Acknowledgements" + }, { "type": "Title", "element_id": "ee7d6fc036b5c1d6c5f5ebb9bf533f01", diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json index c4b3225805..f2676bbf7f 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json @@ -59,16 +59,6 @@ }, "text": "A benchmark dataset for the multiple depot vehicle scheduling problem" }, - { - "type": "Title", - "element_id": "77b037daa0a8a3f7349bd57dda36499f", - "metadata": { - "data_source": {}, - "filetype": "application/pdf", - "page_number": 1 - }, - "text": "(eee" - }, { "type": "Title", "element_id": "9d8efece3117b2eec928f8ee4d4888e4", diff --git a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json index 3bec9e8600..3c79f7492b 100644 --- a/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/local-single-file-with-pdf-infer-table-structure/layout-parser-paper.pdf.json @@ -511,7 +511,7 @@ }, { "type": "Image", - "element_id": "2326155e29fbcc80862533eba5d9c75c", + "element_id": "0ef9d50781a8637826772ff44e47f462", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -524,7 +524,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "Efficient Data Annotation Model Customization Document Images Community Platform ‘a >) ¥ DIA Model Hub i .) Customized Model Training] == | Layout Detection Models | ——= DIA Pipeline Sharing ~ OCR Module = { Layout Data stuctue ) = (storage Visualization VY" + "text": "Model Customization Document Images Community Platform Efficient Data Annotation ¥ DIA Model Hub Customized Model Training) == | Layout Detection Models | <== DIA Pipeline Sharing OCR Module S— | Layout Data Structure | === | Storage & Visualization A u r Libran" }, { "type": "NarrativeText", @@ -659,7 +659,7 @@ }, "filetype": "application/pdf", "page_number": 5, - "text_as_html": "
Dataset| Base Model'|Notes
PubLayNet B8]|F/MLayouts of modern scientific documents
PRImAMnned modern magazines and scientific reports
NewspapeiFcanned US newspapers from the 20th century
TableBankFTable region on modern scientific and business document
HJDatasetF/MLayouts of history Japanese documents
" + "text_as_html": "
Dataset| Base Mode!'|| Notes
PubLayNet 38]|F/MLayouts of modern scientific documents
PRInA BJMLayouts of scanned modern magazines and scientific reports
NewspaperFLayouts of scanned US newspapers from the 20th century
TableBankFTable region on modern scientific and business document
HJDataset BT]F/MLayouts of history Japanese documents
" }, "text": "Base Model1 Large Model Notes Dataset PubLayNet [38] PRImA [3] Newspaper [17] TableBank [18] HJDataset [31] F / M M F F F / M M - - F - Layouts of modern scientific documents Layouts of scanned modern magazines and scientific reports Layouts of scanned US newspapers from the 20th century Table region on modern scientific and business document Layouts of history Japanese documents" }, @@ -801,7 +801,7 @@ }, { "type": "Image", - "element_id": "3b30176246b01e00c3051a7e2a11669c", + "element_id": "553c63e448f250b7466cdca0d5058f24", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -814,7 +814,7 @@ "filetype": "application/pdf", "page_number": 6 }, - "text": "- ° . 3 a a 4 a 3 oo er ‘ 2 § 8 a 8 3 3 ‘ £ 4 A g a 9 ‘ 3 ¥ Coordinate g 4 5 3 + § 3 H Extra Features [O=\") [Bo] eaing i Text | | Type | | ower ° & a ¢ o [ coordinatel textblock1, 3 3 ’ g Q 3 , textblock2 , layoutl ] 4 q ® A list of the layout elements Ff" + "text": "0 3 B Rectangle Qvodilateral S 4 9 3 8 s > o oy ° vy 3 Coordinate § a g 3 + i} a HY Block] [Block] [Reading] 4 $ Extra features | {ON | |e ote £ o w a c o [ coordinatel textblock1 | 3 ve , ut 3 i lock2 1. 1 Ey oy textbloc! , ayoutl ] g q @ A list of the layout elements 4" }, { "type": "FigureCaption", @@ -1085,7 +1085,7 @@ }, "filetype": "application/pdf", "page_number": 8, - "text_as_html": "
block.pad(top, bottom,right,left)Enlarge the current block according to the input
block.scale(fx, fy)Scale the current block given the ratio ion in x and y di
block.shift(dx, dy)Move the current block with the shift distances in x and y direction
block1.is_in(block2)Whether block] is inside of block2
; block1. intersect (block2)Return the intersection region of block and block2. . . . Coordinate type to be determined based on the inputs.
; block1.union(block2)Return the union region of block1 and block2. . . . Coordinate type to be determined based on the inputs.
block1.relative_to(block2)Convert the absolute coordinates of block to ' ' relative coordinates to block2
. block1.condition_on(block2)Calculate the absolute coordinates of block1 given . the canvas block2’s absolute coordinates
block. crop_image (image)Obtain the image segments in the block region
" + "text_as_html": "
block.pad(top, bottom,right,left) |Enlarge the current block according to the input
block.scale(fx, fy)Scale the current block given the ratio . ; ; in x and y direction
. block.shift(dx, dy)Move the current block with the shift distances in x and y direction
block1.is_in(block2)Whether block] is inside of block2
block1. intersect (block2)Return the intersection region of block1 and block2. . . . Coordinate type to be determined based on the
. block1.union(block2)Return the union region of block1 and block2. . . . Coordinate type to be determined based on the
. block1.relative_to(block2)Convert the absolute coordinates of block1 to . . relative coordinates to block2
block1.condition_on(block2)Calculate the absolute coordinates of block1 given 7 . the canvas block2’s absolute coordinates
block. crop_image (image)Obtain the image segments in the block region
" }, "text": "Operation Name Description block.pad(top, bottom, right, left) Enlarge the current block according to the input Scale the current block given the ratio in x and y direction block.scale(fx, fy) Move the current block with the shift distances in x and y direction block.shift(dx, dy) Whether block1 is inside of block2 block1.is in(block2) Return the intersection region of block1 and block2. Coordinate type to be determined based on the inputs. block1.intersect(block2) Return the union region of block1 and block2. Coordinate type to be determined based on the inputs. block1.union(block2) Convert the absolute coordinates of block1 to relative coordinates to block2 block1.relative to(block2) Calculate the absolute coordinates of block1 given the canvas block2’s absolute coordinates block1.condition on(block2) Obtain the image segments in the block region block.crop image(image)" }, @@ -1193,7 +1193,7 @@ }, { "type": "Image", - "element_id": "294441b6458d8a005ea7588ecb6efc10", + "element_id": "f47a514361f98885c44032539defd182", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -1206,7 +1206,7 @@ "filetype": "application/pdf", "page_number": 9 }, - "text": "x09 Burpunog uayor Aeydsiq 1 vondo 10g Guypunog usyoy apir:z uondo Mode I: Showing Layout on the Original Image Mode Il: Drawing OCR'd Text at the Correspoding Position" + "text": "10g Bupunog vayoy fejdsiq :, uondo 10g 6upunog uayo4 apit 7 vondo Mode I: Showing Layout on the Original Image Mode II: Drawing OCR'd Text at the Correspoding Position" }, { "type": "NarrativeText", @@ -1295,7 +1295,7 @@ }, { "type": "Image", - "element_id": "6e6e9ba62b25fdfb8734842354a7ce64", + "element_id": "3b10103e6e1a9915917ddaacc2b32a87", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -1308,7 +1308,7 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "Intra-column reading order Token Categories tie (Adress 2) tee (NE sumber Variable HEE company type Column Categories (J tite we) adaress —_ (7) section Header by ‘e * Column reading order a a (a) Illustration of the original Japanese Maximum Allowed Height BRE B>e EER eR (b) Illustration of the recreated document with dense text structure for better OCR performance" + "text": "Intra-column reading order i s & s e Number Column reading order Variable Company Type | L st id fa | f fs - i. il (a) Illustration of the original Japanese document with detected layout elements highlighted in colored boxes = Column Categories z (J tite = Ei r f| Sah i mai — = 3 YW a2 mx ia 2 Ae ion Hea g i 2 ae section Header & H 4 fe § i Ls ie & 3 (b) Illustration of the recreated document with dense text structure for better OCR performance" }, { "type": "NarrativeText", @@ -1482,7 +1482,7 @@ }, { "type": "Image", - "element_id": "e55a2d8ec9ea5f1d6c11788f33f2b97d", + "element_id": "38ec7bb20e004337f270a2753a9c1672", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -1495,7 +1495,7 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "(spe peepee, ‘Active Learning Layout Annotate Layout Dataset | + ‘Annotation Toolkit ¥ a Deep Leaming Layout Model Training & Inference, ¥ ; Handy Data Structures & Post-processing El Apis for Layout Det a LAR ror tye eats) 4 Text Recognition | <—— Default ane Customized ¥ ee Layout Structure Visualization & Export | <—— | visualization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules" + "text": "——EE : Active Learning Layout Annotate Layout Dataset | + | annotation Toolkit ¥ ¥ Layout Detection Deep Learning ‘ieee Model Training & Inferenc an Handy Data Structures 2 bis fol Layout Date 4 Text Recognition | <— [ Pe/autand Customized ¥ jee eae rs Visualization & Export | *——\"| Vicyaiization & Storage The Japanese Document Helpful LayoutParser Digitization Pipeline Modules" }, { "type": "NarrativeText", @@ -1703,7 +1703,7 @@ }, { "type": "Image", - "element_id": "7f494d0f1a8170f2ed0da01c039fcbd2", + "element_id": "022b00b724d86f4bf89acd047d4ed816", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/example-docs/layout-parser-paper.pdf", @@ -1716,7 +1716,7 @@ "filetype": "application/pdf", "page_number": 13 }, - "text": "(@) Partial table at the bottom (&) Full page table (6) Partial table at the top (d) Mis-detected tet line" + "text": "(2) Partial table at the bottom (b) Full page table (c) Partial table at the top (@) Mis-detected text line" }, { "type": "FigureCaption", diff --git a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json index 18efff6c15..c4331a0d3c 100644 --- a/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json @@ -201,7 +201,7 @@ }, { "type": "NarrativeText", - "element_id": "fdb8017fc73bdc12f7200dece8b76c99", + "element_id": "f56d2e27c162702cf94bde02654de85b", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -217,11 +217,11 @@ ], "page_number": 1 }, - "text": "File this form to request a change in your accounting method, including the accounting treatment of any item. If you are requesting a change in accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods." + "text": "File this form to request change in your accounting method, including the accounting treatment of any item. If you are requesting a change accounting period, use Form 1128, Application for Change in Accounting Period. For more information, see Publication 538, Accounting Periods and Methods. a in" }, { "type": "NarrativeText", - "element_id": "45f1ee6a617b92c165698c4456a870fb", + "element_id": "f5c4fd3a66441f170a7d8c2de43b932a", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -237,11 +237,11 @@ ], "page_number": 1 }, - "text": "When filing Form 3115, taxpayers are reminded to determine if IRS has published a ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form 3115)," + "text": "When filing Form 3115, taxpayers are reminded to determine if IRS has published ruling or procedure dealing with the specific type of change since November 1987 (the current revision date of Form a" }, { "type": "NarrativeText", - "element_id": "84e7e32f584e2ee9f47ba593bf86c559", + "element_id": "72dae82dc74be0196fa0402563e720e4", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -257,11 +257,11 @@ ], "page_number": 1 }, - "text": "Generally, applicants must complete Section A. In addition, complete the appropriate sections (B-1 through H) for which a change Is desired." + "text": "In Generally, applicants must complete Section A. addition, complete the appropriate sections (B-1 through H) for which change desired. a" }, { "type": "NarrativeText", - "element_id": "bf2a070cb9d03d056e70b26bebf1ef79", + "element_id": "98c346a1639313c912f8d7bb324783b0", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -277,11 +277,11 @@ ], "page_number": 1 }, - "text": "You must give all relevant facts, including a detailed description of your present and proposed methods. You must also state the reason(s) you believe approval to make the requested change should be granted. Attach additional pages if more space is needed for explanations. Each page should show your name, address, and identifying number." + "text": "You must give all relevant facts, including detailed description of your present and proposed methods. You must also state the reason(s) you believe approval to make the requested change should be granted. Attach additional pages if more space is needed for explanations. Each page should show your name, address, and identifying number. a" }, { "type": "NarrativeText", - "element_id": "505160cf4f5ef1cf128734840a8c98d1", + "element_id": "a891f37f83ccf53b32203d1d3ceaf34c", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -297,7 +297,7 @@ ], "page_number": 1 }, - "text": "State whether you desire a conference in the National Office if the Service proposes to disapprove your application." + "text": "State whether you desire conference the National Office if the Service proposes to disapprove your application. a in" }, { "type": "Title", @@ -321,7 +321,7 @@ }, { "type": "NarrativeText", - "element_id": "582deac2def308ecc5250773e1683052", + "element_id": "0b924ea8853fb6b25a8242bbf859ea42", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -337,11 +337,11 @@ ], "page_number": 1 }, - "text": "Uniform capitalization rules and limitation on cash method.—If you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion in inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change 1s treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change is required." + "text": "Uniform capitalization rules and limitation on cash you are required to change your method of accounting under section,263A (relating to the capitalization and inclusion inventory costs of certain expenses) or 448 (limiting the use of the cash method of accounting by certain taxpayers) as added by the Tax Reform Act of 1986 (“Act”), the change is treated as initiated by the taxpayer, approved by the Commissioner, and the period for taking the adjustments under section 481(a) into account will not exceed 4 years. (Hospitals required to change from the cash method under section 448 have 10 years to take the adjustrnents into account.) Complete Section A and the appropriate sections (B-1 or C and D) for which the change required. in 1s" }, { "type": "NarrativeText", - "element_id": "550f9e99054c657264fb9bb26d3023de", + "element_id": "e1269cc283d0919ca2dff1307c9b6d0c", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -357,11 +357,11 @@ ], "page_number": 1 }, - "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on a separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section 448\"). See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information." + "text": "Disregard the instructions under Time and Place for Filing and Late Applications. Instead, attach Form 3115 to your income tax return for the year of change; do not file it separately. Also include on separate statement accompanying the Form 3115 the period over which the section 481(a) adjustment will be taken into account and the basis for that conclusion. Identify the automatic change being made at the top of page 1 of Form 3115 (e.g., “Automatic Change to Accrual Method—Section See Temporary Regulations sections 1.263A-1T and 1.448-1T for additional information. a" }, { "type": "NarrativeText", - "element_id": "463c7a60fb455863074b2919009e36f1", + "element_id": "b7450c68fb975c05e79a1494a7c9f206", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -377,11 +377,11 @@ ], "page_number": 1 }, - "text": "Long-term contracts. —If you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed." + "text": "Long-term you are required to change your method of accounting for long-term contracts under section 460, see Notice 87-61 (9/21/87), 1987-38 IRB 40, for the notification procedures that must be followed." }, { "type": "NarrativeText", - "element_id": "d30c76a61eda53cddda756c6d370d0e9", + "element_id": "cb7251b240a20591de6b90bd853ca81b", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -397,7 +397,7 @@ ], "page_number": 1 }, - "text": "Other methods.—Unless the Service has published a regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the Inclusion of income attributable to the sale or furnishing of utility services no later than the year In which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these changes." + "text": "In Other methods.—Unless the Service has published regulation or procedure to the contrary, all other changes !n accounting methods required by the Act are automatically considered to be approved by the Commissioner. Examples of method changes automatically approved by the Commissioner are those changes required to effect: (1) the repeal of the reserve method for bad debts of taxpayers other than financial institutions (Act section 805); (2) the repeal of the installment method for sales under a revolving credit plan (Act section 812); (3) the of income attributable to the sale or furnishing of utility services no later than the year which the services were provided to customers (Act section 821); and (4) the repeal of the deduction for qualified discount coupons (Act section 823). Do not file Form 3115 for these changes. a" }, { "type": "Title", @@ -421,7 +421,7 @@ }, { "type": "NarrativeText", - "element_id": "af8bdf713f162b09567c8d1a3a2d4de7", + "element_id": "5bef4033f85a9c1e341e7ea9ad84a819", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -437,7 +437,7 @@ ], "page_number": 1 }, - "text": "Generally, applicants must file this form within the first 180 days of the tax year in which it is desired to make the change." + "text": "is Generally, applicants must file this form within the first 180 days of the tax year which it desired to make the change. in" }, { "type": "UncategorizedText", @@ -481,7 +481,7 @@ }, { "type": "NarrativeText", - "element_id": "9112783151f904e2500e3f979246fe23", + "element_id": "101fd8a2d6b17910f1394cbdc59ba9ac", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -497,7 +497,7 @@ ], "page_number": 1 }, - "text": "See section 5.03 of Rev. Proc. 84-74 for filing an early application. Note: /f this form is being filed in accordance with Rev. Proc. 74-11, see Section G below." + "text": "is See section 5.03 of Rev. Proc. 84-74 for filing an early application. Note: /f this form being filed accordance with Rev. Proc. 74-11, see Section G below. in" }, { "type": "Title", @@ -521,7 +521,7 @@ }, { "type": "NarrativeText", - "element_id": "02dd043b5686a46b2f03cfe8cf56aae9", + "element_id": "aef150e1ab590eb8f5adeede1951127c", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -537,7 +537,7 @@ ], "page_number": 1 }, - "text": "If your application is filed after the 180-day period, it is late. The application will be considered for processing only upon a showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63." + "text": "is If your application is filed after the 180-day period, it late. The application will be considered for processing only upon showing of “good cause” and if it can be shown to the satisfaction of the Commissioner that granting you an extension will not jeopardize the Government's interests. For further information, see Rev, Proc. 79-63. a" }, { "type": "Title", @@ -561,7 +561,7 @@ }, { "type": "NarrativeText", - "element_id": "174c82e3aaff00ca222300c67ebf8478", + "element_id": "fb9fd9b6c6070fb56a8207f5baa899a5", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -577,11 +577,11 @@ ], "page_number": 1 }, - "text": "Individuals.—An individual should enter his or her social security number in this block. If the application is made on behalf of a husband and wife who file their income tax return jointly, enter the social security numbers of both. Others.-—The employer identification number of" + "text": "is individual should enter his or her social security number this block. If the application made on behalf of husband and wife who file their income tax return jointly, enter the social security numbers of both. Others.-—The employer identification number of a in" }, { "type": "NarrativeText", - "element_id": "138bc39ed883db62eb13920f3123df79", + "element_id": "52423926150736139986e51ee10d332d", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -597,7 +597,7 @@ ], "page_number": 1 }, - "text": "an applicant other than an individual should be entered in this block." + "text": "an applicant other than an individual should be entered this block. in" }, { "type": "Title", @@ -621,7 +621,7 @@ }, { "type": "NarrativeText", - "element_id": "dc1531183c8e3f45a78f110ec1efe15f", + "element_id": "79dfe9d9fc08cf5188d93240a83a76a2", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -637,7 +637,7 @@ ], "page_number": 1 }, - "text": "Individuals. —An individual desiring the change should sign the application. If the application pertains to a husband and wife filing a joint income tax return, the names of both should appear in the heading and both should sign." + "text": "Individuals. —An individual desiring the change should sign the application. If the application pertains to husband and wife filing a joint income tax return, the names of both should appear the heading and both should sign. a" }, { "type": "NarrativeText", @@ -661,7 +661,7 @@ }, { "type": "NarrativeText", - "element_id": "9de285e8e3b042aa9ac86edde98a21a9", + "element_id": "e5a5dd1b32ec6ed02e1f2b599bbc2059", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -677,7 +677,7 @@ ], "page_number": 1 }, - "text": "Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized to sign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file. For a subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation." + "text": "Corporations, cooperatives, and insurance companies.—The form should show the name of the corporation, cooperative, or insurance company and the signature of the president, vice president, treasurer, assistant treasurer, or chief accounting officer (such as tax officer) authorized to sign, and his or her official title. Receivers, trustees, or assignees must sign any application they are required to file. For subsidiary corporation filing a consolidated return with its parent, the form should be signed by an officer of the parent corporation. a" }, { "type": "NarrativeText", @@ -701,7 +701,7 @@ }, { "type": "NarrativeText", - "element_id": "52e2b8e4b8527ae448e9db2dfd0c43c7", + "element_id": "26cd16ba800280f9a0fd82b86c14017b", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -717,11 +717,11 @@ ], "page_number": 1 }, - "text": "Preparer other than partner, officer, etc.—The signature of the individual preparing the application should appear in the space provided on page 6." + "text": "Preparer other than partner, officer, signature of the individual preparing the application should appear the space provided on page 6." }, { "type": "NarrativeText", - "element_id": "1df7107903f249d938fbf3710f50283a", + "element_id": "f0bb11defd7a610f3fba2cace851c138", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -737,7 +737,7 @@ ], "page_number": 1 }, - "text": "If the individual or firm is also authorized to represent the applicant before the IRS, receive a copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s)." + "text": "is If the individual or firm also authorized to represent the applicant before the IRS, receive copy of the requested ruling, or perform any other act(s), the power of attorney must reflect such authorization(s). a" }, { "type": "Title", @@ -761,7 +761,7 @@ }, { "type": "NarrativeText", - "element_id": "58e977f2200b46ac8b372586dfd781bf", + "element_id": "8225c14fe71d62ade2b61d3ffd399c28", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -777,7 +777,7 @@ ], "page_number": 1 }, - "text": "Taxpayers that are members of an affiliated group filing a consolidated return that seeks to change to the same accounting method for more than one member of the group must file a separate Form 3115 for each such member," + "text": "Taxpayers that are members of an affiliated group filing consolidated return that seeks to change to the same accounting method for more than one member of the group must file a separate Form 3115 for each such member, a" }, { "type": "Title", @@ -801,7 +801,7 @@ }, { "type": "NarrativeText", - "element_id": "b57b7502430c59194bb865cfa1bcfab5", + "element_id": "0fb95b28729285a09f392180a1e448ff", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -817,11 +817,11 @@ ], "page_number": 1 }, - "text": "Item 5a, page 1.—“Taxable income or (loss) from operations” is to be entered before application of any net operating loss deduction under section 172(a)." + "text": "is Item 5a, page 1.—“Taxable income or (loss) from operations” to be entered before application of any net operating loss deduction under section 172(a)." }, { "type": "NarrativeText", - "element_id": "9eefeb9556d95a8dd563ff3270cae7f4", + "element_id": "9c0e16fefff9d44e19fb1ab33cec3714", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -837,11 +837,11 @@ ], "page_number": 1 }, - "text": "Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. In addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and annuities). However, if you are a resaler of personal property, exclude from gross receipts any amounts not derived in the ordinary course of a trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax is legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." + "text": "In is Item 6, page 2.—The term “gross receipts” includes total sales (net of returns and allowances) and all amounts received for services. addition, gross receipts include any income from investments and from incidental or outside sources (e.g., interest, dividends, rents, royalties, and However, if you resaler of personal property, exclude from gross receipts any amounts not derived the ordinary course of trade or business. Gross receipts do not include amounts received for sales taxes if, under the applicable state or local law, the tax legally imposed on the purchaser of the good or service, and the taxpayer merely collects and remits the tax to the taxing authority." }, { "type": "NarrativeText", - "element_id": "3e63f740940cd3ab94c17d2bbf48b13a", + "element_id": "d90ef7d23d6207ce7917fbecf5640e43", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -857,11 +857,11 @@ ], "page_number": 1 }, - "text": "Item 7b, page 2.—If item 7b 1s “Yes,” indicate on a separate sheet the following for each separate trade or business: Nature of business" + "text": "Item 7b, page 2.—If item 7b “Yes,” indicate separate sheet the following for each separate trade or business: Nature of business" }, { "type": "NarrativeText", - "element_id": "3db206c935841c3dcd5b3a1d41e56b84", + "element_id": "48c2d5f3a529a7b13c8d44c01aafc9bf", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -877,7 +877,7 @@ ], "page_number": 2 }, - "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or is also changing its accounting method as part of this request or as a separate request." + "text": "(manufacturing, retailer, wholesaler, etc.), employer identification number, overall method of accounting, and whether, in the last 6 years, that business has changed its accounting method, or also changing its accounting method as part of this request or as a separate request. is" }, { "type": "NarrativeText", @@ -981,7 +981,7 @@ }, { "type": "NarrativeText", - "element_id": "751abc8c6a0fa412c3e8c18345f57f95", + "element_id": "0d458b224c6d4835bc5ae0ac4f2effbd", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -997,7 +997,7 @@ ], "page_number": 2 }, - "text": "Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” 1s not acceptable. However, “more than 6 years” Is acceptable." + "text": "Is Item 13, page 2.—Insert the actual number of tax years. Use of the term “since inception” not acceptable. However, “more than 6 years” 1s" }, { "type": "Title", @@ -1021,7 +1021,7 @@ }, { "type": "NarrativeText", - "element_id": "e4a695ea83818204438fe08add6d1554", + "element_id": "2d34ae6088764bc3d7d8e6721c4a0035", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1037,7 +1037,7 @@ ], "page_number": 2 }, - "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year in which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application." + "text": "Item 1b, page 2.—Include any amounts reported as income ina prior year although the income had not been accrued (earned) or received in the prior year; for example, discount on installment loans reported as income for the year which the loans were made instead of for the year or years in which the income was received or earned. Advance payments under Rev. Proc. 71-21 or Regulations section 1.451-5 must be fully explained and all pertinent information must be submitted with this application. in" }, { "type": "Title", @@ -1061,7 +1061,7 @@ }, { "type": "NarrativeText", - "element_id": "eac562ca19f6198691856c695e2790bd", + "element_id": "9979d47a98941079b7e2ed936947bcec", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1077,7 +1077,7 @@ ], "page_number": 2 }, - "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 1s treated as aC corporation with respect to its unrelated trade or business activities." + "text": "Limitation on the Use of the Cash Method of Accounting. —Except as provided below, C corporations, partnerships with a C corporation as a partner, and tax shelters may not use the cash method of accounting. For purposes of this limitation, a trust subject to the tax on unrelated business income under section 511 treated as aC corporation with respect to its unrelated trade or business activities. 1s" }, { "type": "NarrativeText", @@ -1101,7 +1101,7 @@ }, { "type": "NarrativeText", - "element_id": "69bd87b2ad5873c030748e62adf61b89", + "element_id": "660a726a3dc06508d224b4a056ee6f05", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1117,11 +1117,11 @@ ], "page_number": 2 }, - "text": "(1) Farming businesses.—F or this purpose, the term “farming business” 1s defined in section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method." + "text": "(1) Farming or this purpose, the term “farming business” defined section 263A(e)(4), but it also includes the raising, harvesting, or growing of trees to which section 263A(c)(5) applies. Notwithstanding this exception, section 447 requires certain C corporations and partnerships with a C corporation as a partner to use the accrual method. in" }, { "type": "NarrativeText", - "element_id": "df67e4b3a4a1352209c2648b87d675e2", + "element_id": "729a25dd291df1039b68e262128069f2", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1137,7 +1137,7 @@ ], "page_number": 2 }, - "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” is any corporation: (a) substantially all of the activities of which involve the performance of services in the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b)" + "text": "(2) Qualified personal service corporations. — A “qualified personal service corporation” any corporation: (a) substantially all of the activities of which involve the performance of services the fields of health, law, engineering, architecture, accounting, actuarial science, performing arts, or consulting, and (b) is in" }, { "type": "Title", @@ -1161,7 +1161,7 @@ }, { "type": "NarrativeText", - "element_id": "44902073e7cc4fa753f25d40e009dcef", + "element_id": "d98c1e7f06d44a597e02c8ad2f13c952", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1177,11 +1177,11 @@ ], "page_number": 2 }, - "text": "substantially all of the stock of which is owned by employees performing the services, retired employees who had performed the services, any estate of any individual who had performed the services listed above, or any person who acquired stock of the corporation as a result of the death of an employee or retiree described above if the acquisition occurred within 2 years of death." + "text": "substantially all of the stock of which owned by employees performing the services, retired employees who had performed the services, any estate of any individual who had performed the services listed above, or any person who acquired stock of the corporation as a result of the death of an employee or retiree described above if the acquisition occurred within 2 years of death. is" }, { "type": "NarrativeText", - "element_id": "b68a5b5b0d59122e0df42a96d68d2b5e", + "element_id": "e8b840139ed88ab7763dc6c4a93ec87c", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1197,7 +1197,7 @@ ], "page_number": 2 }, - "text": "(3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not in existence for the entire 3-year period, the period of existence is used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months in the short period." + "text": "(3) Entities with gross receipts of $5,000,000 or less. —To qualify for this exception, the C corporation's or partnership’s annual average gross receipts for the three years ending with the prior tax year may not exceed $5,000,000. If the corporation or partnership was not existence for the entire 3-year period, the period of existence used to determine whether the corporation or partnership qualifies. If any tax year in the 3-year period is a short tax year, the corporation or partnership must annualize the gross receipts by multiplying the gross receipts by 12 and dividing the result by the number of months the short period. in is in" }, { "type": "NarrativeText", @@ -1241,7 +1241,7 @@ }, { "type": "NarrativeText", - "element_id": "a9e8c96063f3fea7ea05eb3cd41ebe7a", + "element_id": "b5839831e89ab26a76e6893767950fcb", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1257,11 +1257,11 @@ ], "page_number": 2 }, - "text": "Applicants must give complete details about the present method of valuing inventory and the proposed method. State whether all or part of your inventory ts involved in the change." + "text": "Applicants must give complete details about the present method of valuing inventory and the proposed method. State whether all or part of your inventory ts involved the change. in" }, { "type": "NarrativeText", - "element_id": "7e90b155b5cdb2481b1dfbb1118142c5", + "element_id": "09f2d284fcf47c304439e8c522ffbc43", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1277,7 +1277,7 @@ ], "page_number": 2 }, - "text": "Inventories of retail merchants.—The retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever Is lower, by the adjustments required in Regulations section 1.471-8." + "text": "Is Inventories of retail retail method of pricing inventories does not contemplate valuation of goods at the retail selling price. The retail selling price of goods on hand must be reduced to approximate cost or cost or market, whichever lower, by the adjustments required Regulations section 1.471-8. in" }, { "type": "NarrativeText", @@ -1301,7 +1301,7 @@ }, { "type": "NarrativeText", - "element_id": "347f638641329c72c971a522ec07f6b1", + "element_id": "ddc7e4686a9eae9851ab800feb32cebb", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1317,11 +1317,11 @@ ], "page_number": 2 }, - "text": "(1) The specific types and classes of goods in the LIFO inventories involved in the proposed changes and the comparative value of such Inventories as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market)." + "text": "(1) The specific types and classes of goods the LIFO inventories involved the proposed changes and the comparative value of such as of the end of the tax year preceding the year of change determined by: (a) the LIFO method, and (b) the proposed method and basis (such as FIFO cost or lower of cost or market). in in" }, { "type": "NarrativeText", - "element_id": "ea8e44805e010fa33ed03e9a8f22ddb2", + "element_id": "a5323a5758bf2434486b3da8d88f451e", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1337,7 +1337,7 @@ ], "page_number": 2 }, - "text": "(2) State whether the proposed identification and valuation methods conform to the inventory method currently used with respect to non-LIFO Inventories, if any, or how such method is otherwise consistent with Regulations section 1.4726." + "text": "(2) State whether the proposed identification and valuation methods conform to the inventory method currently used with respect to non-LIFO if any, or how such method otherwise consistent with Regulations section is" }, { "type": "NarrativeText", @@ -1401,7 +1401,7 @@ }, { "type": "Title", - "element_id": "b6b99aba936d71652bca9a28d2a68eb8", + "element_id": "4bde94dc330268d2f63a09423409c6d4", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1417,7 +1417,7 @@ ], "page_number": 2 }, - "text": "% U.S. Government Printing Office: 1987—201-993/60166" + "text": "U.S. Government Printing Office: 1987—201-993/60166" }, { "type": "Title", @@ -1441,7 +1441,7 @@ }, { "type": "NarrativeText", - "element_id": "86fab9f7b35d56a2d48baf0782b7c53d", + "element_id": "4f1e4558be502a24b0d3588c1bc0ebcd", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1457,7 +1457,7 @@ ], "page_number": 2 }, - "text": "Section 460(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that is not completed within the tax year in which it 1s entered into. However, a manufacturing contract will not qualify as a long-term contract unless the contract involves the manufacture of: (1) a unique item not normally included in your finished goods inventory, or (2) any item that normally requires more than 12 calendar months to complete." + "text": "Section 460(f) provides that the term “long-term contract” means any contract for the manufacturing, building, installation, or construction of property that not completed within the tax year which it is entered into. However, a manufacturing contract will not qualify as a long-term contract unless the contract involves the manufacture of: (1) a unique item not normally included your finished goods inventory, or (2) any item that normally requires more than 12 calendar months to complete. in in" }, { "type": "NarrativeText", @@ -1521,7 +1521,7 @@ }, { "type": "NarrativeText", - "element_id": "cf5e2bc86b7c77533924eb940fd522d5", + "element_id": "c26fe151eae5251dfee2f76dc2e4182b", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1537,11 +1537,11 @@ ], "page_number": 2 }, - "text": "This section Is to be used only to request a change in a method of accounting for depreciation under section 167." + "text": "Is This section to be used only to request a change a method of accounting for depreciation under section 167. in" }, { "type": "NarrativeText", - "element_id": "b8355dc568ea042f9da586188b404bca", + "element_id": "1eece6c160dc638a671d39c88a49d5a7", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1557,11 +1557,11 @@ ], "page_number": 2 }, - "text": "Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year in which it is desired to make the change. Attach a copy of the form to the income tax return for the tax year of the change." + "text": "is Rev. Proc. 74-11 provides a procedure whereby applicants are considered to have obtained the consent of the Commissioner to change their method of accounting for depreciation. You must file Form 3115 with the Service Center where your return will be filed within the first 180 days of the tax year which it desired to make the change. copy of the form to the income tax return for the tax year of the change. in a Attach" }, { "type": "NarrativeText", - "element_id": "319882ba6726e29222f5522c53887960", + "element_id": "435ac2b1c14ce8d7196dd93aa1389cc7", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1577,7 +1577,7 @@ ], "page_number": 2 }, - "text": "Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year in which the property 1s placed in service. In addition, Form 3115 is not to be used to request approval to revoke an election made under section 168. Such a request must be made in accordance with Rev. Proc. 87-1 (updated annually)." + "text": "In is Note: Do not use Form 3115 to make an election under section 168. Such an election may be made only on the tax return for the year which the property placed service. addition, Form 3115 not to be used to request approval to revoke an election made under section 168. Such request must be made in accordance with Rev. Proc. 87-1 (updated annually). in a" }, { "type": "Title", @@ -1601,7 +1601,7 @@ }, { "type": "NarrativeText", - "element_id": "cb1f664a186a87f6560cde136d70b558", + "element_id": "5e3a5b49a73f1b162202316da843198b", "metadata": { "data_source": { "url": "/home/runner/work/unstructured/unstructured/test_unstructured_ingest/download/azure/IRS-form-1987.pdf", @@ -1617,7 +1617,7 @@ ], "page_number": 2 }, - "text": "Generally, this section should be used for requesting changes In a method of accounting for which provision has not been made elsewhere on this form. Attach additional pages if more space ts needed for a full explanation of the present method used and the proposed change requested." + "text": "In Generally, this section should be used for requesting changes method of accounting for which provision has not been made elsewhere on this form. additional pages if more space ts needed for a full explanation of the present method used and the proposed change requested." }, { "type": "NarrativeText", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json index d6fefa2083..78ce704d77 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -631,7 +631,7 @@ }, { "type": "ListItem", - "element_id": "ab9d11a9dd37cfd5e1876f40777a4480", + "element_id": "57b9271999bf59e4bc0807fdffb4b865", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -645,7 +645,7 @@ "filetype": "application/pdf", "page_number": 4 }, - "text": "International Monetary Fund | January 2023 3" + "text": "International Monetary Fund | January 2023. 3" }, { "type": "Title", @@ -1909,7 +1909,7 @@ }, { "type": "ListItem", - "element_id": "cbb9553ae9412cc864f9f254b47c3efc", + "element_id": "c78d5a22c9065ddd7f6da9efa8353eac", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1923,11 +1923,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "International Monetary Fund | January 2023 9" + "text": "International Monetary Fund | January 2023. 9" }, { "type": "Image", - "element_id": "cd9e31727baaddee4567c7ef27c4937a", + "element_id": "02e937302c68fded4234aea81fe5b14b", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", @@ -1941,7 +1941,7 @@ "filetype": "application/pdf", "page_number": 11 }, - "text": "BOX 1. GL AL FINANCIAL STABILITY UPDATE" + "text": "JAN 2023 BOX 1. GLOBAL FINANCIAL STABILITY UPDATE" }, { "type": "NarrativeText", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json index 0f33bf5021..c7e738b35a 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/Silent-Giant-(1).pdf.json @@ -55,7 +55,7 @@ }, { "type": "Title", - "element_id": "14547603bad3329c14c74b8c4e2ff8d9", + "element_id": "3ad74247d9203b064430c509bfe0ebe0", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -69,7 +69,7 @@ "filetype": "application/pdf", "page_number": 1 }, - "text": "//s88ciation" + "text": "/[s#8Peinron" }, { "type": "Title", @@ -1152,8 +1152,8 @@ "text": "e" }, { - "type": "Title", - "element_id": "f83714d89302473e0e4f5399bd50e7a9", + "type": "UncategorizedText", + "element_id": "380918b946a526640a40df5dced65167", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1167,11 +1167,11 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "W T" + "text": "=" }, { - "type": "UncategorizedText", - "element_id": "380918b946a526640a40df5dced65167", + "type": "Title", + "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1185,7 +1185,7 @@ "filetype": "application/pdf", "page_number": 8 }, - "text": "=" + "text": "W T" }, { "type": "NarrativeText", @@ -1745,24 +1745,6 @@ }, "text": "iv" }, - { - "type": "Title", - "element_id": "d3fc2842ddfad4c8d3859f84d4439bfd", - "metadata": { - "data_source": { - "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", - "version": "177372694731575984083482917563244941766", - "record_locator": { - "protocol": "s3", - "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" - }, - "date_modified": "2023-02-12T10:10:36" - }, - "filetype": "application/pdf", - "page_number": 10 - }, - "text": "Vv" - }, { "type": "Title", "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", @@ -1801,7 +1783,7 @@ }, { "type": "Title", - "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", + "element_id": "c1d2906220d1eef1b17422b7132872a8", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1815,11 +1797,11 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "vi" + "text": "vii" }, { - "type": "Title", - "element_id": "c1d2906220d1eef1b17422b7132872a8", + "type": "UncategorizedText", + "element_id": "d2e2adf7177b7a8afddbc12d1634cf23", "metadata": { "data_source": { "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", @@ -1833,7 +1815,7 @@ "filetype": "application/pdf", "page_number": 10 }, - "text": "vii" + "text": "_" }, { "type": "NarrativeText", @@ -1961,6 +1943,24 @@ }, "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom" }, + { + "type": "UncategorizedText", + "element_id": "380918b946a526640a40df5dced65167", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf", + "version": "177372694731575984083482917563244941766", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" + }, + "date_modified": "2023-02-12T10:10:36" + }, + "filetype": "application/pdf", + "page_number": 12 + }, + "text": "=" + }, { "type": "NarrativeText", "element_id": "8ff63a0f4af4de37eff90952d575f76d", diff --git a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json index acd8c3b2b0..acc28e6c84 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/small-pdf-set/recalibrating-risk-report.pdf.json @@ -1457,6 +1457,24 @@ }, "text": "World Nuclear Association Tower House 10 Southampton Street London WC2E 7HA United Kingdom" }, + { + "type": "UncategorizedText", + "element_id": "380918b946a526640a40df5dced65167", + "metadata": { + "data_source": { + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "306475068461766865312866697521104206816", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + }, + "date_modified": "2023-02-12T10:09:32" + }, + "filetype": "application/pdf", + "page_number": 12 + }, + "text": "=" + }, { "type": "NarrativeText", "element_id": "8ff63a0f4af4de37eff90952d575f76d", diff --git a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv index e362972a46..22f9a6a2c6 100644 --- a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv +++ b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv @@ -1,3 +1,3 @@ strategy average sample_sd population_sd count cct-accuracy 0.798 0.083 0.072 4 -cct-%missing 0.089 0.04 0.035 4 +cct-%missing 0.089 0.04 0.034 4 diff --git a/test_unstructured_ingest/metrics/all-docs-cct.tsv b/test_unstructured_ingest/metrics/all-docs-cct.tsv index 69ffeaaab6..d8d93d098e 100644 --- a/test_unstructured_ingest/metrics/all-docs-cct.tsv +++ b/test_unstructured_ingest/metrics/all-docs-cct.tsv @@ -2,4 +2,4 @@ filename doctype connector cct-accuracy cct-%missing science-exploration-1p.pptx pptx dropbox 0.861 0.093 science-exploration-1p.pptx pptx box 0.861 0.093 example-10k.html html local 0.686 0.037 -IRS-form-1987.pdf pdf azure 0.783 0.135 +IRS-form-1987.pdf pdf azure 0.783 0.134