From 90072b77d4ff728e3a029c2b63f118f952c3316e Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Fri, 20 Oct 2023 02:45:25 -0500 Subject: [PATCH] Suppress kwargs when not used --- CHANGELOG.md | 2 +- unstructured_inference/inference/layout.py | 22 ---------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24b260f1..36065d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## 0.7.10-dev2 -* Handle kwargs related to pdf processing in signature +* Handle kwargs explicitly when needed, suppress otherwise * fix: Reduce Chipper memory consumption on x86_64 cpus * fix: Skips ordering elements coming from Chipper * fix: After refactoring to introduce Chipper, annotate() weren't able to show text with extra info from elements, this is fixed now. diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index ff2c1918..e8303e66 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -71,13 +71,8 @@ def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout: def from_file( cls, filename: str, - detection_model: Optional[UnstructuredObjectDetectionModel] = None, - element_extraction_model: Optional[UnstructuredElementExtractionModel] = None, fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, - extract_tables: bool = False, pdf_image_dpi: int = 200, - extract_images_in_pdf: bool = False, - image_output_dir_path: Optional[str] = None, **kwargs, ) -> DocumentLayout: """Creates a DocumentLayout from a pdf file.""" @@ -110,13 +105,8 @@ def from_file( image, number=i + 1, document_filename=filename, - detection_model=detection_model, - element_extraction_model=element_extraction_model, layout=layout, fixed_layout=fixed_layout, - extract_tables=extract_tables, - extract_images_in_pdf=extract_images_in_pdf, - image_output_dir_path=image_output_dir_path, **kwargs, ) pages.append(page) @@ -457,12 +447,6 @@ def from_image( def process_data_with_model( data: BinaryIO, model_name: Optional[str], - is_image: bool = False, - fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None, - extract_tables: bool = False, - pdf_image_dpi: int = 200, - extract_images_in_pdf: bool = False, - image_output_dir_path: Optional[str] = None, **kwargs, ) -> DocumentLayout: """Processes pdf file in the form of a file handler (supporting a read method) into a @@ -473,12 +457,6 @@ def process_data_with_model( layout = process_file_with_model( tmp_file.name, model_name, - is_image=is_image, - fixed_layouts=fixed_layouts, - extract_tables=extract_tables, - pdf_image_dpi=pdf_image_dpi, - extract_images_in_pdf=extract_images_in_pdf, - image_output_dir_path=image_output_dir_path, **kwargs, )