From 9f7ff4fd98b475ea9726dc96e7cffbfb380f4050 Mon Sep 17 00:00:00 2001
From: Christine Straub <christinemstraub@gmail.com>
Date: Fri, 3 Nov 2023 08:02:43 -0700
Subject: [PATCH] rfctr: Clean up test functions in `test_pdf.py` (#1999)

### Summary:
- use the test utility function `example_doc_path()`
- clean up test functions related to `metadata_date` and
`exclude_metadata`
---
 .../partition/pdf_image/test_pdf.py           | 309 ++++++------------
 1 file changed, 91 insertions(+), 218 deletions(-)

diff --git a/test_unstructured/partition/pdf_image/test_pdf.py b/test_unstructured/partition/pdf_image/test_pdf.py
index 176a4bad92..7b5babda32 100644
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@@ -90,7 +90,7 @@ def pages(self):
 @pytest.mark.parametrize(
     ("filename", "file"),
     [
-        ("example-docs/layout-parser-paper-fast.pdf", None),
+        (example_doc_path("layout-parser-paper-fast.pdf"), None),
         (None, b"0000"),
     ],
 )
@@ -141,7 +141,7 @@ def test_partition_pdf(
     strategy,
     expected,
     origin,
-    filename="example-docs/layout-parser-paper-with-empty-pages.pdf",
+    filename=example_doc_path("layout-parser-paper-with-empty-pages.pdf"),
 ):
     # Test that the partition_pdf function can handle filename
     def _test(result):
@@ -171,7 +171,7 @@ def _test(result):
 @mock.patch.dict(os.environ, {"UNSTRUCTURED_HI_RES_MODEL_NAME": "checkbox"})
 def test_partition_pdf_with_model_name_env_var(
     monkeypatch,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
     with mock.patch.object(
@@ -185,7 +185,7 @@ def test_partition_pdf_with_model_name_env_var(
 
 def test_partition_pdf_with_model_name(
     monkeypatch,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     monkeypatch.setattr(pdf, "extractable_elements", lambda *args, **kwargs: [])
     with mock.patch.object(
@@ -198,31 +198,31 @@ def test_partition_pdf_with_model_name(
 
 
 def test_partition_pdf_with_auto_strategy(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename, strategy="auto")
     title = "LayoutParser: A Uniﬁed Toolkit for Deep Learning Based Document Image Analysis"
     assert elements[6].text == title
     assert elements[6].metadata.filename == "layout-parser-paper-fast.pdf"
-    assert elements[6].metadata.file_directory == "example-docs"
+    assert elements[6].metadata.file_directory == os.path.dirname(filename)
 
 
 def test_partition_pdf_with_page_breaks(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename, url=None, include_page_breaks=True)
     assert "PageBreak" in [elem.category for elem in elements]
 
 
 def test_partition_pdf_with_no_page_breaks(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename, url=None)
     assert "PageBreak" not in [elem.category for elem in elements]
 
 
 def test_partition_pdf_with_fast_strategy(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename, url=None, strategy="fast")
     assert len(elements) > 10
@@ -233,7 +233,7 @@ def test_partition_pdf_with_fast_strategy(
 
 
 def test_partition_pdf_with_fast_neg_coordinates():
-    filename = "example-docs/negative-coords.pdf"
+    filename = example_doc_path("negative-coords.pdf")
     elements = pdf.partition_pdf(filename=filename, url=None, strategy="fast")
     assert len(elements) == 5
     assert elements[0].metadata.coordinates.points[0][0] < 0
@@ -241,7 +241,7 @@ def test_partition_pdf_with_fast_neg_coordinates():
 
 
 def test_partition_pdf_with_fast_groups_text(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename, url=None, strategy="fast")
 
@@ -257,7 +257,7 @@ def test_partition_pdf_with_fast_groups_text(
 
 
 def test_partition_pdf_with_fast_strategy_from_file(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     with open(filename, "rb") as f:
         elements = pdf.partition_pdf(file=f, url=None, strategy="fast")
@@ -266,7 +266,7 @@ def test_partition_pdf_with_fast_strategy_from_file(
 
 def test_partition_pdf_with_fast_strategy_and_page_breaks(
     caplog,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(
         filename=filename,
@@ -283,7 +283,7 @@ def test_partition_pdf_with_fast_strategy_and_page_breaks(
 
 
 def test_partition_pdf_raises_with_bad_strategy(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     with pytest.raises(ValueError):
         pdf.partition_pdf(filename=filename, url=None, strategy="made_up")
@@ -292,7 +292,7 @@ def test_partition_pdf_raises_with_bad_strategy(
 def test_partition_pdf_falls_back_to_fast(
     monkeypatch,
     caplog,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     def mock_exists(dep):
         return dep not in ["unstructured_inference", "pytesseract"]
@@ -314,7 +314,7 @@ def mock_exists(dep):
 def test_partition_pdf_falls_back_to_fast_from_ocr_only(
     monkeypatch,
     caplog,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     def mock_exists(dep):
         return dep not in ["pytesseract"]
@@ -340,7 +340,7 @@ def mock_exists(dep):
 def test_partition_pdf_falls_back_to_hi_res_from_ocr_only(
     monkeypatch,
     caplog,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     def mock_exists(dep):
         return dep not in ["pytesseract"]
@@ -363,7 +363,7 @@ def mock_exists(dep):
 def test_partition_pdf_falls_back_to_ocr_only(
     monkeypatch,
     caplog,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     def mock_exists(dep):
         return dep not in ["unstructured_inference"]
@@ -383,7 +383,7 @@ def mock_exists(dep):
 
 
 def test_partition_pdf_uses_table_extraction():
-    filename = "example-docs/layout-parser-paper-fast.pdf"
+    filename = example_doc_path("layout-parser-paper-fast.pdf")
     with mock.patch(
         "unstructured.partition.ocr.process_file_with_ocr",
     ) as mock_process_file_with_model:
@@ -399,7 +399,7 @@ def test_partition_pdf_uses_table_extraction():
     ],
 )
 def test_partition_pdf_hi_table_extraction_with_languages(ocr_mode):
-    filename = "example-docs/korean-text-with-tables.pdf"
+    filename = example_doc_path("korean-text-with-tables.pdf")
     elements = pdf.partition_pdf(
         filename=filename,
         ocr_mode=ocr_mode,
@@ -423,7 +423,7 @@ def test_partition_pdf_hi_table_extraction_with_languages(ocr_mode):
     ],
 )
 def test_partition_pdf_hi_res_ocr_mode_with_table_extraction(ocr_mode):
-    filename = "example-docs/layout-parser-paper.pdf"
+    filename = example_doc_path("layout-parser-paper.pdf")
     elements = pdf.partition_pdf(
         filename=filename,
         ocr_mode=ocr_mode,
@@ -456,7 +456,7 @@ def test_partition_pdf_with_dpi():
         assert mock_process.call_args[1]["pdf_image_dpi"] == 100
 
 
-def test_partition_pdf_requiring_recursive_text_grab(filename="example-docs/reliance.pdf"):
+def test_partition_pdf_requiring_recursive_text_grab(filename=example_doc_path("reliance.pdf")):
     elements = pdf.partition_pdf(filename=filename, strategy="fast")
     assert len(elements) > 50
     assert elements[0].metadata.page_number == 1
@@ -474,7 +474,7 @@ def test_partition_pdf_with_copy_protection_fallback_to_hi_res(caplog):
 
 def test_partition_pdf_fails_if_pdf_not_processable(
     monkeypatch,
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     def mock_exists(dep):
         return dep not in ["unstructured_inference", "pytesseract"]
@@ -527,7 +527,7 @@ def test_partition_pdf_fast_groups_text_in_text_box():
 
 
 def test_partition_pdf_with_metadata_filename(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(
         filename=filename,
@@ -540,7 +540,7 @@ def test_partition_pdf_with_metadata_filename(
 
 
 def test_partition_pdf_with_fast_strategy_from_file_with_metadata_filename(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     with open(filename, "rb") as f:
         elements = pdf.partition_pdf(
@@ -553,186 +553,77 @@ def test_partition_pdf_with_fast_strategy_from_file_with_metadata_filename(
         assert element.metadata.filename == "test"
 
 
-def test_partition_pdf_with_auto_strategy_exclude_metadata(
-    filename="example-docs/layout-parser-paper-fast.pdf",
-):
-    elements = pdf.partition_pdf(
-        filename=filename,
-        strategy="auto",
-        include_metadata=False,
-    )
-    title = "LayoutParser: A Uniﬁed Toolkit for Deep Learning Based Document Image Analysis"
-    assert elements[6].text == title
-    for i in range(len(elements)):
-        assert elements[i].metadata.to_dict() == {}
-
-
-def test_partition_pdf_with_fast_strategy_from_file_exclude_metadata(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+@pytest.mark.parametrize("file_mode", ["filename", "rb"])
+@pytest.mark.parametrize("strategy", ["auto", "hi_res", "fast", "ocr_only"])
+def test_partition_pdf_exclude_metadata(
+    file_mode,
+    strategy,
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
-    with open(filename, "rb") as f:
+    if file_mode == "filename":
         elements = pdf.partition_pdf(
-            file=f,
-            url=None,
-            strategy="fast",
+            filename=filename,
+            strategy=strategy,
             include_metadata=False,
         )
+    else:
+        with open(filename, "rb") as f:
+            elements = pdf.partition_pdf(
+                file=f,
+                url=None,
+                strategy=strategy,
+                include_metadata=False,
+            )
+
     for i in range(len(elements)):
         assert elements[i].metadata.to_dict() == {}
 
 
-def test_partition_pdf_with_auto_strategy_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date",
-        return_value=mocked_last_modification_date,
-    )
-
-    elements = pdf.partition_pdf(
-        filename=filename,
-    )
-
-    assert elements[0].metadata.last_modified == mocked_last_modification_date
-
-
-def test_partition_pdf_with_auto_strategy_custom_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-    expected_last_modification_date = "2020-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date",
-        return_value=mocked_last_modification_date,
-    )
-
-    elements = pdf.partition_pdf(
-        filename=filename,
-        metadata_last_modified=expected_last_modification_date,
-    )
-
-    assert elements[0].metadata.last_modified == expected_last_modification_date
-
-
-def test_partition_pdf_with_hi_res_strategy_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date",
-        return_value=mocked_last_modification_date,
-    )
-
-    elements = pdf.partition_pdf(filename=filename, strategy="hi_res")
-
-    assert elements[0].metadata.last_modified == mocked_last_modification_date
-
-
-def test_partition_pdf_with_hi_res_strategy_custom_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-    expected_last_modification_date = "2020-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date",
-        return_value=mocked_last_modification_date,
-    )
-
-    elements = pdf.partition_pdf(
-        filename=filename,
-        metadata_last_modified=expected_last_modification_date,
-        strategy="hi_res",
-    )
-
-    assert elements[0].metadata.last_modified == expected_last_modification_date
-
-
-def test_partition_pdf_from_file_with_auto_strategy_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date_from_file",
-        return_value=mocked_last_modification_date,
-    )
-
-    with open(filename, "rb") as f:
-        elements = pdf.partition_pdf(
-            file=f,
-        )
-
-    assert elements[0].metadata.last_modified == mocked_last_modification_date
-
-
-def test_partition_pdf_from_file_with_auto_strategy_custom_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-    expected_last_modification_date = "2020-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date_from_file",
-        return_value=mocked_last_modification_date,
-    )
-
-    with open(filename, "rb") as f:
-        elements = pdf.partition_pdf(
-            file=f,
-            metadata_last_modified=expected_last_modification_date,
-        )
-
-    assert elements[0].metadata.last_modified == expected_last_modification_date
-
-
-def test_partition_pdf_from_file_with_hi_res_strategy_metadata_date(
+@pytest.mark.parametrize("file_mode", ["filename", "rb", "spool"])
+@pytest.mark.parametrize("strategy", ["auto", "hi_res", "fast", "ocr_only"])
+@pytest.mark.parametrize("last_modification_date", [None, "2020-07-05T09:24:28"])
+def test_partition_pdf_metadata_date(
     mocker,
-    filename="example-docs/copy-protected.pdf",
+    file_mode,
+    strategy,
+    last_modification_date,
+    filename=example_doc_path("copy-protected.pdf"),
 ):
     mocked_last_modification_date = "2029-07-05T09:24:28"
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date_from_file",
-        return_value=mocked_last_modification_date,
+    expected_last_modification_date = (
+        last_modification_date if last_modification_date else mocked_last_modification_date
     )
 
-    with open(filename, "rb") as f:
-        elements = pdf.partition_pdf(file=f, strategy="hi_res")
-
-    assert elements[0].metadata.last_modified == mocked_last_modification_date
-
-
-def test_partition_pdf_from_file_with_hi_res_strategy_custom_metadata_date(
-    mocker,
-    filename="example-docs/copy-protected.pdf",
-):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-    expected_last_modification_date = "2020-07-05T09:24:28"
-
     mocker.patch(
-        "unstructured.partition.pdf.get_last_modified_date_from_file",
+        "unstructured.partition.pdf.get_the_last_modification_date_pdf_or_img",
         return_value=mocked_last_modification_date,
     )
 
-    with open(filename, "rb") as f:
+    if file_mode == "filename":
         elements = pdf.partition_pdf(
-            file=f,
-            metadata_last_modified=expected_last_modification_date,
-            strategy="hi_res",
+            filename=filename,
+            strategy=strategy,
+            metadata_last_modified=last_modification_date,
         )
+    elif file_mode == "rb":
+        with open(filename, "rb") as f:
+            elements = pdf.partition_pdf(
+                file=f,
+                strategy=strategy,
+                metadata_last_modified=last_modification_date,
+            )
+    else:
+        with open(filename, "rb") as test_file:
+            spooled_temp_file = SpooledTemporaryFile()
+            spooled_temp_file.write(test_file.read())
+            spooled_temp_file.seek(0)
+            elements = pdf.partition_pdf(
+                file=spooled_temp_file,
+                strategy=strategy,
+                metadata_last_modified=last_modification_date,
+            )
 
-    assert elements[0].metadata.last_modified == expected_last_modification_date
+    assert {el.metadata.last_modified for el in elements} == {expected_last_modification_date}
 
 
 @pytest.mark.parametrize("strategy", ["fast", "hi_res"])
@@ -745,7 +636,7 @@ def test_partition_pdf_with_json(strategy: str):
 
 
 def test_add_chunking_strategy_by_title_on_partition_pdf(
-    filename="example-docs/layout-parser-paper-fast.pdf",
+    filename=example_doc_path("layout-parser-paper-fast.pdf"),
 ):
     elements = pdf.partition_pdf(filename=filename)
     chunk_elements = pdf.partition_pdf(filename, chunking_strategy="by_title")
@@ -755,20 +646,20 @@ def test_add_chunking_strategy_by_title_on_partition_pdf(
 
 
 def test_partition_pdf_formats_languages_for_tesseract():
-    filename = "example-docs/DA-1p.pdf"
+    filename = example_doc_path("DA-1p.pdf")
     with mock.patch.object(ocr, "process_file_with_ocr", mock.MagicMock()) as mock_process:
         pdf.partition_pdf(filename=filename, strategy="hi_res", languages=["en"])
         assert mock_process.call_args[1]["ocr_languages"] == "eng"
 
 
 def test_partition_pdf_warns_with_ocr_languages(caplog):
-    filename = "example-docs/chevron-page.pdf"
+    filename = example_doc_path("chevron-page.pdf")
     pdf.partition_pdf(filename=filename, strategy="hi_res", ocr_languages="eng")
     assert "The ocr_languages kwarg will be deprecated" in caplog.text
 
 
 def test_partition_pdf_or_image_warns_with_ocr_languages(caplog):
-    filename = "example-docs/DA-1p.pdf"
+    filename = example_doc_path("DA-1p.pdf")
     pdf.partition_pdf_or_image(filename=filename, strategy="hi_res", ocr_languages="eng")
     assert "The ocr_languages kwarg will be deprecated" in caplog.text
 
@@ -777,7 +668,7 @@ def test_partition_categorization_backup():
     text = "This is Clearly a Title"
     with mock.patch.object(pdf, "_partition_pdf_or_image_local", return_value=[Text(text)]):
         elements = pdf.partition_pdf_or_image(
-            "example-docs/layout-parser-paper-fast.pdf",
+            example_doc_path("layout-parser-paper-fast.pdf"),
             strategy="hi_res",
         )
         # Should have changed the element class from Text to Title
@@ -787,7 +678,7 @@ def test_partition_categorization_backup():
 
 @pytest.mark.parametrize(
     "filename",
-    ["example-docs/layout-parser-paper-fast.pdf"],
+    [example_doc_path("layout-parser-paper-fast.pdf")],
 )
 def test_combine_numbered_list(filename):
     elements = pdf.partition_pdf(filename=filename, strategy="auto")
@@ -805,7 +696,7 @@ def test_combine_numbered_list(filename):
 
 @pytest.mark.parametrize(
     "filename",
-    ["example-docs/layout-parser-paper-fast.pdf"],
+    [example_doc_path("layout-parser-paper-fast.pdf")],
 )
 def test_partition_pdf_hyperlinks(filename):
     elements = pdf.partition_pdf(filename=filename, strategy="auto")
@@ -831,7 +722,7 @@ def test_partition_pdf_hyperlinks(filename):
 
 @pytest.mark.parametrize(
     "filename",
-    ["example-docs/embedded-link.pdf"],
+    [example_doc_path("embedded-link.pdf")],
 )
 def test_partition_pdf_hyperlinks_multiple_lines(filename):
     elements = pdf.partition_pdf(filename=filename, strategy="auto")
@@ -845,7 +736,7 @@ def test_partition_pdf_uses_model_name():
         "_partition_pdf_or_image_local",
     ) as mockpartition:
         pdf.partition_pdf(
-            "example-docs/layout-parser-paper-fast.pdf",
+            example_doc_path("layout-parser-paper-fast.pdf"),
             model_name="test",
             strategy="hi_res",
         )
@@ -856,7 +747,7 @@ def test_partition_pdf_uses_model_name():
 
 
 def test_partition_pdf_word_bbox_not_char(
-    filename="example-docs/interface-config-guide-p93.pdf",
+    filename=example_doc_path("interface-config-guide-p93.pdf"),
 ):
     try:
         elements = pdf.partition_pdf(filename=filename)
@@ -866,7 +757,7 @@ def test_partition_pdf_word_bbox_not_char(
 
 
 def test_partition_pdf_raises_TypeError_for_invalid_languages():
-    filename = "example-docs/chevron-page.pdf"
+    filename = example_doc_path("chevron-page.pdf")
     with pytest.raises(TypeError):
         pdf.partition_pdf(filename=filename, strategy="hi_res", languages="eng")
 
@@ -908,7 +799,7 @@ def test_default_hi_res_model(infer_table_structure, env, expected, monkeypatch)
 
 
 def test_partition_model_name_default_to_None():
-    filename = "example-docs/DA-1p.pdf"
+    filename = example_doc_path("DA-1p.pdf")
     try:
         pdf.partition_pdf(
             filename=filename,
@@ -946,7 +837,7 @@ class CallException(Exception):
     # Patch the ocr function with the mock that will record the call and then terminate
     with mock.patch(ocr_func, mock_ocr_func), pytest.raises(CallException):
         pdf.partition_pdf(
-            "example-docs/layout-parser-paper-fast.pdf",
+            example_doc_path("layout-parser-paper-fast.pdf"),
             strategy=strategy,
             ocr_languages="kor",
         )
@@ -1005,34 +896,20 @@ def test_get_uris_from_annots_string_annotation(
 @pytest.mark.parametrize(
     ("filename", "is_image"),
     [
-        ("example-docs/layout-parser-paper-fast.pdf", False),
-        ("example-docs/layout-parser-paper-fast.jpg", True),
+        (example_doc_path("layout-parser-paper-fast.pdf"), False),
+        (example_doc_path("layout-parser-paper-fast.jpg"), True),
     ],
 )
-@pytest.mark.parametrize("last_modification_date", [None, "2020-07-05T09:24:28"])
 def test_partition_pdf_with_ocr_only_strategy(
-    mocker,
     file_mode,
     filename,
     is_image,
-    last_modification_date,
 ):
-    mocked_last_modification_date = "2029-07-05T09:24:28"
-    expected_last_modification_date = (
-        last_modification_date if last_modification_date else mocked_last_modification_date
-    )
-
-    mocker.patch(
-        "unstructured.partition.pdf.get_the_last_modification_date_pdf_or_img",
-        return_value=mocked_last_modification_date,
-    )
-
     if file_mode == "filename":
         elements = pdf.partition_pdf(
             filename=filename,
             strategy="ocr_only",
             languages=["eng"],
-            metadata_last_modified=last_modification_date,
             is_image=is_image,
         )
     elif file_mode == "rb":
@@ -1041,7 +918,6 @@ def test_partition_pdf_with_ocr_only_strategy(
                 file=f,
                 strategy="ocr_only",
                 languages=["eng"],
-                metadata_last_modified=last_modification_date,
                 is_image=is_image,
             )
     else:
@@ -1053,13 +929,10 @@ def test_partition_pdf_with_ocr_only_strategy(
                 file=spooled_temp_file,
                 strategy="ocr_only",
                 languages=["eng"],
-                metadata_last_modified=last_modification_date,
                 is_image=is_image,
             )
 
     assert elements[0].metadata.languages == ["eng"]
-    assert {el.metadata.last_modified for el in elements} == {expected_last_modification_date}
-
     # check pages
     if is_image:
         assert {el.metadata.page_number for el in elements} == {1}