diff --git a/docker/prepare.sh b/docker/prepare.sh
index ce367c6..e443d30 100644
--- a/docker/prepare.sh
+++ b/docker/prepare.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 function start_docker() {
-  docker run --gpus=all --shm-size 2g --net=host -itd --name bisheng_unstr_dev1 \
+  docker run --net=host -itd --name bisheng_unstr_dev1 \
    -v /home/hanfeng:/home/hanfeng -v /home/public:/home/public ubuntu:20.04 bash
 }
 
diff --git a/src/unstructured/__init__.py b/src/bisheng_unstructured/__init__.py
similarity index 100%
rename from src/unstructured/__init__.py
rename to src/bisheng_unstructured/__init__.py
diff --git a/src/unstructured/__version__.py b/src/bisheng_unstructured/__version__.py
similarity index 100%
rename from src/unstructured/__version__.py
rename to src/bisheng_unstructured/__version__.py
diff --git a/src/unstructured/cleaners/__init__.py b/src/bisheng_unstructured/cleaners/__init__.py
similarity index 100%
rename from src/unstructured/cleaners/__init__.py
rename to src/bisheng_unstructured/cleaners/__init__.py
diff --git a/src/unstructured/cleaners/core.py b/src/bisheng_unstructured/cleaners/core.py
similarity index 99%
rename from src/unstructured/cleaners/core.py
rename to src/bisheng_unstructured/cleaners/core.py
index 49c2068..050050f 100644
--- a/src/unstructured/cleaners/core.py
+++ b/src/bisheng_unstructured/cleaners/core.py
@@ -3,10 +3,10 @@
 import sys
 import unicodedata
 
-from unstructured.file_utils.encoding import (
+from bisheng_unstructured.file_utils.encoding import (
     format_encoding_str,
 )
-from unstructured.nlp.patterns import (
+from bisheng_unstructured.nlp.patterns import (
     DOUBLE_PARAGRAPH_PATTERN_RE,
     E_BULLET_PATTERN,
     LINE_BREAK_RE,
diff --git a/src/unstructured/cleaners/extract.py b/src/bisheng_unstructured/cleaners/extract.py
similarity index 98%
rename from src/unstructured/cleaners/extract.py
rename to src/bisheng_unstructured/cleaners/extract.py
index 69c3999..e28c0cf 100644
--- a/src/unstructured/cleaners/extract.py
+++ b/src/bisheng_unstructured/cleaners/extract.py
@@ -2,7 +2,7 @@
 import re
 from typing import List, Optional
 
-from unstructured.nlp.patterns import (
+from bisheng_unstructured.nlp.patterns import (
     EMAIL_ADDRESS_PATTERN,
     EMAIL_DATETIMETZ_PATTERN,
     IP_ADDRESS_NAME_PATTERN,
diff --git a/src/unstructured/cleaners/translate.py b/src/bisheng_unstructured/cleaners/translate.py
similarity index 95%
rename from src/unstructured/cleaners/translate.py
rename to src/bisheng_unstructured/cleaners/translate.py
index 8cb16a1..69907cd 100644
--- a/src/unstructured/cleaners/translate.py
+++ b/src/bisheng_unstructured/cleaners/translate.py
@@ -4,8 +4,8 @@
 import langdetect
 from transformers import MarianMTModel, MarianTokenizer
 
-from unstructured.nlp.tokenize import sent_tokenize
-from unstructured.staging.huggingface import chunk_by_attention_window
+from bisheng_unstructured.nlp.tokenize import sent_tokenize
+from bisheng_unstructured.staging.huggingface import chunk_by_attention_window
 
 
 def _get_opus_mt_model_name(source_lang: str, target_lang: str):
diff --git a/src/unstructured/documents/__init__.py b/src/bisheng_unstructured/documents/__init__.py
similarity index 100%
rename from src/unstructured/documents/__init__.py
rename to src/bisheng_unstructured/documents/__init__.py
diff --git a/src/unstructured/documents/base.py b/src/bisheng_unstructured/documents/base.py
similarity index 97%
rename from src/unstructured/documents/base.py
rename to src/bisheng_unstructured/documents/base.py
index a2c729b..54994d3 100644
--- a/src/unstructured/documents/base.py
+++ b/src/bisheng_unstructured/documents/base.py
@@ -3,7 +3,7 @@
 from abc import ABC
 from typing import List, Optional
 
-from unstructured.documents.elements import Element, NarrativeText
+from bisheng_unstructured.documents.elements import Element, NarrativeText
 
 
 class Document(ABC):
diff --git a/src/unstructured/documents/coordinates.py b/src/bisheng_unstructured/documents/coordinates.py
similarity index 100%
rename from src/unstructured/documents/coordinates.py
rename to src/bisheng_unstructured/documents/coordinates.py
diff --git a/src/unstructured/documents/elements.py b/src/bisheng_unstructured/documents/elements.py
similarity index 99%
rename from src/unstructured/documents/elements.py
rename to src/bisheng_unstructured/documents/elements.py
index c0cb1ee..8827697 100644
--- a/src/unstructured/documents/elements.py
+++ b/src/bisheng_unstructured/documents/elements.py
@@ -13,7 +13,7 @@
 from functools import wraps
 from typing import Any, Callable, Dict, List, Optional, Tuple, TypedDict, Union, cast
 
-from unstructured.documents.coordinates import (
+from bisheng_unstructured.documents.coordinates import (
     TYPE_TO_COORDINATE_SYSTEM_MAP,
     CoordinateSystem,
     RelativeCoordinateSystem,
diff --git a/src/unstructured/documents/email_elements.py b/src/bisheng_unstructured/documents/email_elements.py
similarity index 97%
rename from src/unstructured/documents/email_elements.py
rename to src/bisheng_unstructured/documents/email_elements.py
index ae449a6..0931c26 100644
--- a/src/unstructured/documents/email_elements.py
+++ b/src/bisheng_unstructured/documents/email_elements.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing import Callable, List, Union
 
-from unstructured.documents.elements import UUID, Element, NoID, Text
+from bisheng_unstructured.documents.elements import UUID, Element, NoID, Text
 
 
 class NoDatestamp(ABC):
diff --git a/src/unstructured/documents/html.py b/src/bisheng_unstructured/documents/html.py
similarity index 98%
rename from src/unstructured/documents/html.py
rename to src/bisheng_unstructured/documents/html.py
index 22ade7e..ae0ce80 100644
--- a/src/unstructured/documents/html.py
+++ b/src/bisheng_unstructured/documents/html.py
@@ -12,9 +12,11 @@
 
 
 
-from unstructured.cleaners.core import clean_bullets, replace_unicode_quotes
-from unstructured.documents.base import Page
-from unstructured.documents.elements import (
+from bisheng_unstructured.cleaners.core import (
+    clean_bullets, replace_unicode_quotes
+)
+from bisheng_unstructured.documents.base import Page
+from bisheng_unstructured.documents.elements import (
     Address,
     Element,
     EmailAddress,
@@ -26,9 +28,9 @@
     Table,
     ElementMetadata
 )
-from unstructured.documents.xml import VALID_PARSERS, XMLDocument
-from unstructured.logger import logger
-from unstructured.partition.text_type import (
+from bisheng_unstructured.documents.xml import VALID_PARSERS, XMLDocument
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.partition.text_type import (
     is_bulleted_text,
     is_email_address,
     is_possible_narrative_text,
diff --git a/src/unstructured/documents/html_utils.py b/src/bisheng_unstructured/documents/html_utils.py
similarity index 96%
rename from src/unstructured/documents/html_utils.py
rename to src/bisheng_unstructured/documents/html_utils.py
index 3992436..763e111 100644
--- a/src/unstructured/documents/html_utils.py
+++ b/src/bisheng_unstructured/documents/html_utils.py
@@ -1,4 +1,4 @@
-from unstructured.documents.markdown import transform_html_table_to_md
+from bisheng_unstructured.documents.markdown import transform_html_table_to_md
 
 
 def visualize_html(elements, output_file):
diff --git a/src/bisheng_unstructured/documents/layout.py b/src/bisheng_unstructured/documents/layout.py
new file mode 100644
index 0000000..2bd7021
--- /dev/null
+++ b/src/bisheng_unstructured/documents/layout.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+
+import os
+import tempfile
+from pathlib import PurePath
+from typing import BinaryIO, Collection, List, Optional, Tuple, Union, cast
+
+import numpy as np
+from PIL import Image
+
+
+class DocumentLayout:
+    """Class for handling documents that are saved as .pdf files. For .pdf files, a
+    document image analysis (DIA) model detects the layout of the page prior to extracting
+    element."""
+
+    def __init__(self, pages=None):
+        self._pages = pages
+
+    def __str__(self) -> str:
+        return "\n\n".join([str(page) for page in self.pages])
+
+    @property
+    def pages(self) -> List[PageLayout]:
+        """Gets all elements from pages in sequential order."""
+        return self._pages
+
+    @classmethod
+    def from_pages(cls, pages: List[PageLayout]) -> DocumentLayout:
+        """Generates a new instance of the class from a list of `PageLayouts`s"""
+        doc_layout = cls()
+        doc_layout._pages = pages
+        return doc_layout
+
+    @classmethod
+    def from_file(
+        cls,
+        filename: str,
+        detection_model: Optional[Any] = None,
+        element_extraction_model: Optional[Any] = None,
+        fixed_layouts: Optional[List[Optional[List[Any]]]] = None,
+        ocr_strategy: str = "auto",
+        ocr_languages: str = "eng",
+        extract_tables: bool = False,
+        pdf_image_dpi: int = 200,
+    ) -> DocumentLayout:
+        """Creates a DocumentLayout from a pdf file."""
+        logger.info(f"Reading PDF for file: {filename} ...")
+        pages: List[PageLayout] = []
+        return cls.from_pages(pages)
+
+    @classmethod
+    def from_image_file(
+        cls,
+        filename: str,
+        detection_model: Optional[Any] = None,
+        element_extraction_model: Optional[Any] = None,
+        ocr_strategy: str = "auto",
+        ocr_languages: str = "eng",
+        fixed_layout: Optional[List[Any]] = None,
+        extract_tables: bool = False,
+    ) -> DocumentLayout:
+        """Creates a DocumentLayout from an image file."""
+        logger.info(f"Reading image file: {filename} ...")
+        return cls.from_pages([])
+
+
+class PageLayout:
+    """Class for an individual PDF page."""
+
+    def __init__(
+        self,
+        number: int,
+        image: Image.Image,
+        layout: Optional[List[Any]],
+        image_metadata: Optional[dict] = None,
+        image_path: Optional[Union[str, PurePath]] = None,
+        detection_model: Optional[Any] = None,
+        element_extraction_model: Optional[Any] = None,
+        ocr_strategy: str = "auto",
+        ocr_languages: str = "eng",
+        extract_tables: bool = False,
+    ):
+        self.elements: Collection[Any] = []
+
+
+    def __str__(self) -> str:
+        return "\n\n".join([str(element) for element in self.elements])
+
+    def get_elements_using_image_extraction(
+        self,
+        inplace=True,
+    ) -> Optional[List[Any]]:
+        """Uses end-to-end text element extraction model to extract the elements on the page."""
+        return []
+
+    def get_elements_with_detection_model(self, inplace=True) -> Optional[List[Any]]:
+        """Uses specified model to detect the elements on the page."""
+        elements = []
+        if inplace:
+            self.elements = elements
+            return None
+        return elements
+
+    def get_elements_from_layout(self, layout: List[Any]) -> List[Any]:
+        """Uses the given Layout to separate the page text into elements, either extracting the
+        text from the discovered layout blocks or from the image using OCR."""
+        return []
+
+    def _get_image_array(self) -> Union[np.ndarray, None]:
+        """Converts the raw image into a numpy array."""
+        if self.image_array is None:
+            if self.image:
+                self.image_array = np.array(self.image)
+            else:
+                image = Image.open(self.image_path)
+                self.image_array = np.array(image)
+        return self.image_array
+
+    @classmethod
+    def from_image(
+        cls,
+        image: Image.Image,
+        image_path: Optional[Union[str, PurePath]],
+        number: int = 1,
+        detection_model: Optional[Any] = None,
+        element_extraction_model: Optional[Any] = None,
+        layout: Optional[List[Any]] = None,
+        ocr_strategy: str = "auto",
+        ocr_languages: str = "eng",
+        extract_tables: bool = False,
+        fixed_layout: Optional[List[Any]] = None,
+    ):
+        """Creates a PageLayout from an already-loaded PIL Image."""
+        page = cls(
+            number=number,
+            image=image,    
+            layout=layout,
+            detection_model=detection_model,
+            element_extraction_model=element_extraction_model,
+            ocr_strategy=ocr_strategy,
+            ocr_languages=ocr_languages,
+            extract_tables=extract_tables,
+        )
+        return page
diff --git a/src/unstructured/documents/markdown.py b/src/bisheng_unstructured/documents/markdown.py
similarity index 100%
rename from src/unstructured/documents/markdown.py
rename to src/bisheng_unstructured/documents/markdown.py
diff --git a/src/unstructured/documents/pdf_parser/__init__.py b/src/bisheng_unstructured/documents/pdf_parser/__init__.py
similarity index 100%
rename from src/unstructured/documents/pdf_parser/__init__.py
rename to src/bisheng_unstructured/documents/pdf_parser/__init__.py
diff --git a/src/unstructured/documents/pdf_parser/blob.py b/src/bisheng_unstructured/documents/pdf_parser/blob.py
similarity index 100%
rename from src/unstructured/documents/pdf_parser/blob.py
rename to src/bisheng_unstructured/documents/pdf_parser/blob.py
diff --git a/src/unstructured/documents/pdf_parser/image.py b/src/bisheng_unstructured/documents/pdf_parser/image.py
similarity index 95%
rename from src/unstructured/documents/pdf_parser/image.py
rename to src/bisheng_unstructured/documents/pdf_parser/image.py
index 66f0f5f..32fc168 100644
--- a/src/unstructured/documents/pdf_parser/image.py
+++ b/src/bisheng_unstructured/documents/pdf_parser/image.py
@@ -1,9 +1,9 @@
 from typing import Any, Iterator, List, Mapping, Optional, Union
 import base64
 
-from unstructured.documents.base import Page
+from bisheng_unstructured.documents.base import Page
 
-from unstructured.models import (
+from bisheng_unstructured.models import (
     LayoutAgent, TableAgent, OCRAgent, TableDetAgent)
 
 from .blob import Blob
diff --git a/src/unstructured/documents/pdf_parser/pdf.py b/src/bisheng_unstructured/documents/pdf_parser/pdf.py
similarity index 99%
rename from src/unstructured/documents/pdf_parser/pdf.py
rename to src/bisheng_unstructured/documents/pdf_parser/pdf.py
index ec4f49c..ad566df 100644
--- a/src/unstructured/documents/pdf_parser/pdf.py
+++ b/src/bisheng_unstructured/documents/pdf_parser/pdf.py
@@ -22,11 +22,11 @@
 import pypdfium2
 import fitz
 
-from unstructured.models import (
+from bisheng_unstructured.models import (
     LayoutAgent, TableAgent, OCRAgent, TableDetAgent)
 
-from unstructured.documents.base import Document, Page
-from unstructured.documents.markdown import (
+from bisheng_unstructured.documents.base import Document, Page
+from bisheng_unstructured.documents.markdown import (
     transform_html_table_to_md,
     merge_md_tables,
     merge_html_tables,
@@ -34,7 +34,7 @@
     clean_html_table
 )
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     ListItem,
     NarrativeText,
     Text,
diff --git a/src/unstructured/documents/pdf_parser/test_pdf.py b/src/bisheng_unstructured/documents/pdf_parser/test_pdf.py
similarity index 98%
rename from src/unstructured/documents/pdf_parser/test_pdf.py
rename to src/bisheng_unstructured/documents/pdf_parser/test_pdf.py
index f950f47..b245b7d 100644
--- a/src/unstructured/documents/pdf_parser/test_pdf.py
+++ b/src/bisheng_unstructured/documents/pdf_parser/test_pdf.py
@@ -8,8 +8,8 @@
 import pypdfium2
 import fitz
 
-from unstructured.models import LayoutAgent, TableAgent, OCRAgent
-from unstructured.documents.pdf_parser.blob import Blob
+from bisheng_unstructured.models import LayoutAgent, TableAgent, OCRAgent
+from bisheng_unstructured.documents.pdf_parser.blob import Blob
 
 
 def draw_polygon(image, bbox, text=None, color=(255, 0, 0), thickness=1):
diff --git a/src/unstructured/documents/xml.py b/src/bisheng_unstructured/documents/xml.py
similarity index 95%
rename from src/unstructured/documents/xml.py
rename to src/bisheng_unstructured/documents/xml.py
index c85ea5e..4014bb9 100644
--- a/src/unstructured/documents/xml.py
+++ b/src/bisheng_unstructured/documents/xml.py
@@ -2,10 +2,10 @@
 
 from lxml import etree
 
-from unstructured.documents.base import Document, Page
-from unstructured.file_utils.encoding import read_txt_file
-from unstructured.logger import logger
-from unstructured.partition.text import (
+from bisheng_unstructured.documents.base import Document, Page
+from bisheng_unstructured.file_utils.encoding import read_txt_file
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.partition.text import (
     element_from_text,
     partition_text,
 )
diff --git a/src/unstructured/file_utils/__init__.py b/src/bisheng_unstructured/file_utils/__init__.py
similarity index 100%
rename from src/unstructured/file_utils/__init__.py
rename to src/bisheng_unstructured/file_utils/__init__.py
diff --git a/src/unstructured/file_utils/encoding.py b/src/bisheng_unstructured/file_utils/encoding.py
similarity index 98%
rename from src/unstructured/file_utils/encoding.py
rename to src/bisheng_unstructured/file_utils/encoding.py
index 01d57ff..c59bf77 100644
--- a/src/unstructured/file_utils/encoding.py
+++ b/src/bisheng_unstructured/file_utils/encoding.py
@@ -2,7 +2,7 @@
 
 import chardet
 
-from unstructured.partition.common import convert_to_bytes
+from bisheng_unstructured.partition.common import convert_to_bytes
 
 ENCODE_REC_THRESHOLD = 0.8
 
diff --git a/src/unstructured/file_utils/exploration.py b/src/bisheng_unstructured/file_utils/exploration.py
similarity index 97%
rename from src/unstructured/file_utils/exploration.py
rename to src/bisheng_unstructured/file_utils/exploration.py
index 55d9719..ede00a4 100644
--- a/src/unstructured/file_utils/exploration.py
+++ b/src/bisheng_unstructured/file_utils/exploration.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 
-from unstructured.file_utils.filetype import detect_filetype
+from bisheng_unstructured.file_utils.filetype import detect_filetype
 
 
 def get_directory_file_info(directory: str) -> pd.DataFrame:
diff --git a/src/unstructured/file_utils/file_conversion.py b/src/bisheng_unstructured/file_utils/file_conversion.py
similarity index 91%
rename from src/unstructured/file_utils/file_conversion.py
rename to src/bisheng_unstructured/file_utils/file_conversion.py
index 23b803e..e072a99 100644
--- a/src/unstructured/file_utils/file_conversion.py
+++ b/src/bisheng_unstructured/file_utils/file_conversion.py
@@ -1,8 +1,8 @@
 import tempfile
 from typing import IO, Optional
 
-from unstructured.partition.common import exactly_one
-from unstructured.utils import dependency_exists, requires_dependencies
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.utils import dependency_exists, requires_dependencies
 
 if dependency_exists("pypandoc"):
     import pypandoc
diff --git a/src/unstructured/file_utils/filetype.py b/src/bisheng_unstructured/file_utils/filetype.py
similarity index 97%
rename from src/unstructured/file_utils/filetype.py
rename to src/bisheng_unstructured/file_utils/filetype.py
index a4dd120..9ecdd24 100644
--- a/src/unstructured/file_utils/filetype.py
+++ b/src/bisheng_unstructured/file_utils/filetype.py
@@ -7,21 +7,20 @@
 import zipfile
 from enum import Enum
 from functools import wraps
-from typing import IO, TYPE_CHECKING, Callable, List, Optional
+from typing import IO, Callable, List, Optional
 
-from unstructured.documents.coordinates import PixelSpace
-from unstructured.documents.elements import Element, PageBreak
-from unstructured.file_utils.encoding import detect_file_encoding, format_encoding_str
-from unstructured.nlp.patterns import LIST_OF_DICTS_PATTERN
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.coordinates import PixelSpace
+from bisheng_unstructured.documents.elements import Element, PageBreak
+from bisheng_unstructured.file_utils.encoding import detect_file_encoding, format_encoding_str
+from bisheng_unstructured.nlp.patterns import LIST_OF_DICTS_PATTERN
+from bisheng_unstructured.partition.common import (
     _add_element_metadata,
     _remove_element_metadata,
     exactly_one,
     normalize_layout_element,
 )
 
-if TYPE_CHECKING:
-    from unstructured_inference.inference.layout import DocumentLayout, PageLayout
+from bisheng_unstructured.documents.layout import DocumentLayout, PageLayout
 
 try:
     import magic
@@ -30,8 +29,8 @@
 except ImportError:  # pragma: nocover
     LIBMAGIC_AVAILABLE = False  # pragma: nocover
 
-from unstructured.logger import logger
-from unstructured.nlp.patterns import EMAIL_HEAD_RE
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.nlp.patterns import EMAIL_HEAD_RE
 
 TXT_MIME_TYPES = [
     "text/plain",
diff --git a/src/unstructured/file_utils/google_filetype.py b/src/bisheng_unstructured/file_utils/google_filetype.py
similarity index 100%
rename from src/unstructured/file_utils/google_filetype.py
rename to src/bisheng_unstructured/file_utils/google_filetype.py
diff --git a/src/unstructured/file_utils/metadata.py b/src/bisheng_unstructured/file_utils/metadata.py
similarity index 100%
rename from src/unstructured/file_utils/metadata.py
rename to src/bisheng_unstructured/file_utils/metadata.py
diff --git a/src/unstructured/logger.py b/src/bisheng_unstructured/logger.py
similarity index 100%
rename from src/unstructured/logger.py
rename to src/bisheng_unstructured/logger.py
diff --git a/src/unstructured/models/__init__.py b/src/bisheng_unstructured/models/__init__.py
similarity index 100%
rename from src/unstructured/models/__init__.py
rename to src/bisheng_unstructured/models/__init__.py
diff --git a/src/unstructured/models/layout_agent.py b/src/bisheng_unstructured/models/layout_agent.py
similarity index 100%
rename from src/unstructured/models/layout_agent.py
rename to src/bisheng_unstructured/models/layout_agent.py
diff --git a/src/unstructured/models/ocr_agent.py b/src/bisheng_unstructured/models/ocr_agent.py
similarity index 100%
rename from src/unstructured/models/ocr_agent.py
rename to src/bisheng_unstructured/models/ocr_agent.py
diff --git a/src/unstructured/models/table_agent.py b/src/bisheng_unstructured/models/table_agent.py
similarity index 100%
rename from src/unstructured/models/table_agent.py
rename to src/bisheng_unstructured/models/table_agent.py
diff --git a/src/unstructured/ingest/__init__.py b/src/bisheng_unstructured/nlp/__init__.py
similarity index 100%
rename from src/unstructured/ingest/__init__.py
rename to src/bisheng_unstructured/nlp/__init__.py
diff --git a/src/unstructured/nlp/english-words.txt b/src/bisheng_unstructured/nlp/english-words.txt
similarity index 100%
rename from src/unstructured/nlp/english-words.txt
rename to src/bisheng_unstructured/nlp/english-words.txt
diff --git a/src/unstructured/nlp/english_words.py b/src/bisheng_unstructured/nlp/english_words.py
similarity index 100%
rename from src/unstructured/nlp/english_words.py
rename to src/bisheng_unstructured/nlp/english_words.py
diff --git a/src/bisheng_unstructured/nlp/partition.py b/src/bisheng_unstructured/nlp/partition.py
new file mode 100644
index 0000000..5621365
--- /dev/null
+++ b/src/bisheng_unstructured/nlp/partition.py
@@ -0,0 +1,7 @@
+# flake8: noqa
+from bisheng_unstructured.partition.pdf import partition_pdf
+from bisheng_unstructured.partition.text_type import (
+    is_bulleted_text,
+    is_possible_narrative_text,
+    is_possible_title,
+)
diff --git a/src/unstructured/nlp/patterns.py b/src/bisheng_unstructured/nlp/patterns.py
similarity index 100%
rename from src/unstructured/nlp/patterns.py
rename to src/bisheng_unstructured/nlp/patterns.py
diff --git a/src/unstructured/nlp/tokenize.py b/src/bisheng_unstructured/nlp/tokenize.py
similarity index 100%
rename from src/unstructured/nlp/tokenize.py
rename to src/bisheng_unstructured/nlp/tokenize.py
diff --git a/src/unstructured/ingest/cli/__init__.py b/src/bisheng_unstructured/partition/__init__.py
similarity index 100%
rename from src/unstructured/ingest/cli/__init__.py
rename to src/bisheng_unstructured/partition/__init__.py
diff --git a/src/unstructured/partition/api.py b/src/bisheng_unstructured/partition/api.py
similarity index 96%
rename from src/unstructured/partition/api.py
rename to src/bisheng_unstructured/partition/api.py
index 1064c06..db81c67 100644
--- a/src/unstructured/partition/api.py
+++ b/src/bisheng_unstructured/partition/api.py
@@ -7,9 +7,9 @@
 
 import requests
 
-from unstructured.documents.elements import Element
-from unstructured.partition.common import exactly_one
-from unstructured.staging.base import dict_to_elements, elements_from_json
+from bisheng_unstructured.documents.elements import Element
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.staging.base import dict_to_elements, elements_from_json
 
 
 def partition_via_api(
diff --git a/src/unstructured/partition/auto.py b/src/bisheng_unstructured/partition/auto.py
similarity index 86%
rename from src/unstructured/partition/auto.py
rename to src/bisheng_unstructured/partition/auto.py
index d15649b..c14e07b 100644
--- a/src/unstructured/partition/auto.py
+++ b/src/bisheng_unstructured/partition/auto.py
@@ -3,71 +3,71 @@
 
 import requests
 
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.file_utils.filetype import (
+from bisheng_unstructured.documents.elements import DataSourceMetadata
+from bisheng_unstructured.file_utils.filetype import (
     FILETYPE_TO_MIMETYPE,
     STR_TO_FILETYPE,
     FileType,
     detect_filetype,
     is_json_processable,
 )
-from unstructured.logger import logger
-from unstructured.partition.common import exactly_one
-from unstructured.partition.email import partition_email
-from unstructured.partition.html import partition_html
-from unstructured.partition.json import partition_json
-from unstructured.partition.text import partition_text
-from unstructured.partition.xml import partition_xml
-from unstructured.utils import dependency_exists
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.partition.email import partition_email
+from bisheng_unstructured.partition.html import partition_html
+from bisheng_unstructured.partition.json import partition_json
+from bisheng_unstructured.partition.text import partition_text
+from bisheng_unstructured.partition.xml import partition_xml
+from bisheng_unstructured.utils import dependency_exists
 
 if dependency_exists("pandas"):
-    from unstructured.partition.csv import partition_csv
-    from unstructured.partition.tsv import partition_tsv
+    from bisheng_unstructured.partition.csv import partition_csv
+    from bisheng_unstructured.partition.tsv import partition_tsv
 
 
 if dependency_exists("docx"):
-    from unstructured.partition.doc import partition_doc
-    from unstructured.partition.docx import partition_docx
+    from bisheng_unstructured.partition.doc import partition_doc
+    from bisheng_unstructured.partition.docx import partition_docx
 
 
 if dependency_exists("docx") and dependency_exists("pypandoc"):
-    from unstructured.partition.odt import partition_odt
+    from bisheng_unstructured.partition.odt import partition_odt
 
 
 if dependency_exists("ebooklib"):
-    from unstructured.partition.epub import partition_epub
+    from bisheng_unstructured.partition.epub import partition_epub
 
 
 if dependency_exists("pypandoc"):
-    from unstructured.partition.org import partition_org
-    from unstructured.partition.rst import partition_rst
-    from unstructured.partition.rtf import partition_rtf
+    from bisheng_unstructured.partition.org import partition_org
+    from bisheng_unstructured.partition.rst import partition_rst
+    from bisheng_unstructured.partition.rtf import partition_rtf
 
 
 if dependency_exists("markdown"):
-    from unstructured.partition.md import partition_md
+    from bisheng_unstructured.partition.md import partition_md
 
 
 if dependency_exists("msg_parser"):
-    from unstructured.partition.msg import partition_msg
+    from bisheng_unstructured.partition.msg import partition_msg
 
 
 pdf_imports = ["pdf2image", "pdfminer", "PIL"]
 if all(dependency_exists(dep) for dep in pdf_imports):
-    from unstructured.partition.pdf import partition_pdf
+    from bisheng_unstructured.partition.pdf import partition_pdf
 
 
 if dependency_exists("unstructured_inference"):
-    from unstructured.partition.image import partition_image
+    from bisheng_unstructured.partition.image import partition_image
 
 
 if dependency_exists("pptx"):
-    from unstructured.partition.ppt import partition_ppt
-    from unstructured.partition.pptx import partition_pptx
+    from bisheng_unstructured.partition.ppt import partition_ppt
+    from bisheng_unstructured.partition.pptx import partition_pptx
 
 
 if dependency_exists("pandas") and dependency_exists("openpyxl"):
-    from unstructured.partition.xlsx import partition_xlsx
+    from bisheng_unstructured.partition.xlsx import partition_xlsx
 
 
 def partition(
diff --git a/src/unstructured/partition/common.py b/src/bisheng_unstructured/partition/common.py
similarity index 96%
rename from src/unstructured/partition/common.py
rename to src/bisheng_unstructured/partition/common.py
index 8975aa4..915da72 100644
--- a/src/unstructured/partition/common.py
+++ b/src/bisheng_unstructured/partition/common.py
@@ -10,8 +10,8 @@
 import emoji
 from tabulate import tabulate
 
-from unstructured.documents.coordinates import CoordinateSystem
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.coordinates import CoordinateSystem
+from bisheng_unstructured.documents.elements import (
     TYPE_TO_TEXT_ELEMENT_MAP,
     CheckBox,
     CoordinatesMetadata,
@@ -21,15 +21,15 @@
     PageBreak,
     Text,
 )
-from unstructured.logger import logger
-from unstructured.nlp.patterns import ENUMERATED_BULLETS_RE, UNICODE_BULLETS_RE
-from unstructured.utils import dependency_exists
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.nlp.patterns import ENUMERATED_BULLETS_RE, UNICODE_BULLETS_RE
+from bisheng_unstructured.utils import dependency_exists
 
 if dependency_exists("docx") and dependency_exists("docx.table"):
     from docx.table import Table as docxtable
 
 if TYPE_CHECKING:
-    from unstructured_inference.inference.layoutelement import (
+    from bisheng_unstructured.inference.inference.layoutelement import (
         LayoutElement,
         LocationlessLayoutElement,
     )
diff --git a/src/unstructured/partition/csv.py b/src/bisheng_unstructured/partition/csv.py
similarity index 91%
rename from src/unstructured/partition/csv.py
rename to src/bisheng_unstructured/partition/csv.py
index 6a7314d..1cd5e95 100644
--- a/src/unstructured/partition/csv.py
+++ b/src/bisheng_unstructured/partition/csv.py
@@ -4,14 +4,14 @@
 import pandas as pd
 from lxml.html.soupparser import fromstring as soupparser_fromstring
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     Element,
     ElementMetadata,
     Table,
     process_metadata,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
diff --git a/src/unstructured/partition/doc.py b/src/bisheng_unstructured/partition/doc.py
similarity index 90%
rename from src/unstructured/partition/doc.py
rename to src/bisheng_unstructured/partition/doc.py
index ffa8ccc..ba141be 100644
--- a/src/unstructured/partition/doc.py
+++ b/src/bisheng_unstructured/partition/doc.py
@@ -2,15 +2,15 @@
 import tempfile
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     convert_office_doc,
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.docx import partition_docx
+from bisheng_unstructured.partition.docx import partition_docx
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/docx.py b/src/bisheng_unstructured/partition/docx.py
similarity index 97%
rename from src/unstructured/partition/docx.py
rename to src/bisheng_unstructured/partition/docx.py
index fbe817d..3f9dec4 100644
--- a/src/unstructured/partition/docx.py
+++ b/src/bisheng_unstructured/partition/docx.py
@@ -9,8 +9,8 @@
 from docx.text.paragraph import Paragraph
 from docx.text.run import Run
 
-from unstructured.cleaners.core import clean_bullets
-from unstructured.documents.elements import (
+from bisheng_unstructured.cleaners.core import clean_bullets
+from bisheng_unstructured.documents.elements import (
     Address,
     Element,
     ElementMetadata,
@@ -25,22 +25,22 @@
     Title,
     process_metadata,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     convert_ms_office_table_to_text,
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
     spooled_to_bytes_io_if_needed,
 )
-from unstructured.partition.text_type import (
+from bisheng_unstructured.partition.text_type import (
     is_bulleted_text,
     is_email_address,
     is_possible_narrative_text,
     is_possible_title,
     is_us_city_state_zip,
 )
-from unstructured.utils import dependency_exists
+from bisheng_unstructured.utils import dependency_exists
 
 if dependency_exists("pypandoc"):
     import pypandoc
diff --git a/src/unstructured/partition/email.py b/src/bisheng_unstructured/partition/email.py
similarity index 95%
rename from src/unstructured/partition/email.py
rename to src/bisheng_unstructured/partition/email.py
index 12a4452..4150bd3 100644
--- a/src/unstructured/partition/email.py
+++ b/src/bisheng_unstructured/partition/email.py
@@ -8,13 +8,13 @@
 from tempfile import NamedTemporaryFile, SpooledTemporaryFile, TemporaryDirectory
 from typing import IO, Callable, Dict, List, Optional, Tuple, Union
 
-from unstructured.file_utils.encoding import (
+from bisheng_unstructured.file_utils.encoding import (
     COMMON_ENCODINGS,
     format_encoding_str,
     read_txt_file,
     validate_encoding,
 )
-from unstructured.partition.common import (
+from bisheng_unstructured.partition.common import (
     convert_to_bytes,
     exactly_one,
 )
@@ -24,15 +24,15 @@
 else:
     from typing import Final
 
-from unstructured.cleaners.core import clean_extra_whitespace, replace_mime_encodings
-from unstructured.cleaners.extract import (
+from bisheng_unstructured.cleaners.core import clean_extra_whitespace, replace_mime_encodings
+from bisheng_unstructured.cleaners.extract import (
     extract_datetimetz,
     extract_email_address,
     extract_ip_address,
     extract_ip_address_name,
     extract_mapi_id,
 )
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     Element,
     ElementMetadata,
     Image,
@@ -41,18 +41,18 @@
     Title,
     process_metadata,
 )
-from unstructured.documents.email_elements import (
+from bisheng_unstructured.documents.email_elements import (
     MetaData,
     ReceivedInfo,
     Recipient,
     Sender,
     Subject,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.logger import logger
-from unstructured.nlp.patterns import EMAIL_DATETIMETZ_PATTERN_RE
-from unstructured.partition.html import partition_html
-from unstructured.partition.text import partition_text
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.nlp.patterns import EMAIL_DATETIMETZ_PATTERN_RE
+from bisheng_unstructured.partition.html import partition_html
+from bisheng_unstructured.partition.text import partition_text
 
 VALID_CONTENT_SOURCES: Final[List[str]] = ["text/html", "text/plain"]
 
diff --git a/src/unstructured/partition/epub.py b/src/bisheng_unstructured/partition/epub.py
similarity index 91%
rename from src/unstructured/partition/epub.py
rename to src/bisheng_unstructured/partition/epub.py
index 9a108bb..12b0298 100644
--- a/src/unstructured/partition/epub.py
+++ b/src/bisheng_unstructured/partition/epub.py
@@ -3,14 +3,14 @@
 
 from ebooklib import epub
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.html import partition_html
+from bisheng_unstructured.partition.html import partition_html
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/html.py b/src/bisheng_unstructured/partition/html.py
similarity index 91%
rename from src/unstructured/partition/html.py
rename to src/bisheng_unstructured/partition/html.py
index 8996529..049ff0a 100644
--- a/src/unstructured/partition/html.py
+++ b/src/bisheng_unstructured/partition/html.py
@@ -2,24 +2,24 @@
 
 import requests
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.documents.html import HTMLDocument
-from unstructured.documents.xml import VALID_PARSERS
-from unstructured.file_utils.encoding import read_txt_file
-from unstructured.file_utils.file_conversion import convert_file_to_html_text
-from unstructured.file_utils.filetype import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.documents.html import HTMLDocument
+from bisheng_unstructured.documents.xml import VALID_PARSERS
+from bisheng_unstructured.file_utils.encoding import read_txt_file
+from bisheng_unstructured.file_utils.file_conversion import convert_file_to_html_text
+from bisheng_unstructured.file_utils.filetype import (
     FileType,
     add_metadata_with_filetype,
     document_to_element_list,
 )
-from unstructured.partition.common import (
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
 
 if TYPE_CHECKING:
-    from unstructured_inference.inference.layout import DocumentLayout
+    from bisheng_unstructured.inference.inference.layout import DocumentLayout
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/image.py b/src/bisheng_unstructured/partition/image.py
similarity index 91%
rename from src/unstructured/partition/image.py
rename to src/bisheng_unstructured/partition/image.py
index 65bf87a..41b1a1b 100644
--- a/src/unstructured/partition/image.py
+++ b/src/bisheng_unstructured/partition/image.py
@@ -1,8 +1,8 @@
 from typing import List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.partition.common import exactly_one
-from unstructured.partition.pdf import partition_pdf_or_image
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.partition.pdf import partition_pdf_or_image
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/json.py b/src/bisheng_unstructured/partition/json.py
similarity index 90%
rename from src/unstructured/partition/json.py
rename to src/bisheng_unstructured/partition/json.py
index f4771da..fd1f012 100644
--- a/src/unstructured/partition/json.py
+++ b/src/bisheng_unstructured/partition/json.py
@@ -1,18 +1,18 @@
 import json
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import (
     FileType,
     add_metadata_with_filetype,
     is_json_processable,
 )
-from unstructured.partition.common import (
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.staging.base import dict_to_elements
+from bisheng_unstructured.staging.base import dict_to_elements
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/md.py b/src/bisheng_unstructured/partition/md.py
similarity index 89%
rename from src/unstructured/partition/md.py
rename to src/bisheng_unstructured/partition/md.py
index cb19bbb..bf36768 100644
--- a/src/unstructured/partition/md.py
+++ b/src/bisheng_unstructured/partition/md.py
@@ -3,15 +3,15 @@
 import markdown
 import requests
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.documents.xml import VALID_PARSERS
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.documents.xml import VALID_PARSERS
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.html import partition_html
+from bisheng_unstructured.partition.html import partition_html
 
 
 def optional_decode(contents: Union[str, bytes]) -> str:
diff --git a/src/unstructured/partition/msg.py b/src/bisheng_unstructured/partition/msg.py
similarity index 92%
rename from src/unstructured/partition/msg.py
rename to src/bisheng_unstructured/partition/msg.py
index 414cc18..4321b16 100644
--- a/src/unstructured/partition/msg.py
+++ b/src/bisheng_unstructured/partition/msg.py
@@ -4,12 +4,12 @@
 
 import msg_parser
 
-from unstructured.documents.elements import Element, ElementMetadata, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import exactly_one
-from unstructured.partition.email import convert_to_iso_8601
-from unstructured.partition.html import partition_html
-from unstructured.partition.text import partition_text
+from bisheng_unstructured.documents.elements import Element, ElementMetadata, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.partition.email import convert_to_iso_8601
+from bisheng_unstructured.partition.html import partition_html
+from bisheng_unstructured.partition.text import partition_text
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/odt.py b/src/bisheng_unstructured/partition/odt.py
similarity index 80%
rename from src/unstructured/partition/odt.py
rename to src/bisheng_unstructured/partition/odt.py
index c6e1f9e..457e73d 100644
--- a/src/unstructured/partition/odt.py
+++ b/src/bisheng_unstructured/partition/odt.py
@@ -1,12 +1,12 @@
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.docx import convert_and_partition_docx
+from bisheng_unstructured.partition.docx import convert_and_partition_docx
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/org.py b/src/bisheng_unstructured/partition/org.py
similarity index 83%
rename from src/unstructured/partition/org.py
rename to src/bisheng_unstructured/partition/org.py
index 9e12d28..ae77a22 100644
--- a/src/unstructured/partition/org.py
+++ b/src/bisheng_unstructured/partition/org.py
@@ -1,8 +1,8 @@
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.html import convert_and_partition_html
+from bisheng_unstructured.documents.elements import Element
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.html import convert_and_partition_html
 
 
 @add_metadata_with_filetype(FileType.ORG)
diff --git a/src/unstructured/partition/pdf.py b/src/bisheng_unstructured/partition/pdf.py
similarity index 96%
rename from src/unstructured/partition/pdf.py
rename to src/bisheng_unstructured/partition/pdf.py
index 12724d3..3622c0b 100644
--- a/src/unstructured/partition/pdf.py
+++ b/src/bisheng_unstructured/partition/pdf.py
@@ -10,9 +10,9 @@
 from pdfminer.layout import LTContainer, LTImage, LTItem, LTTextBox
 from pdfminer.utils import open_filename
 
-from unstructured.cleaners.core import clean_extra_whitespace
-from unstructured.documents.coordinates import PixelSpace
-from unstructured.documents.elements import (
+from bisheng_unstructured.cleaners.core import clean_extra_whitespace
+from bisheng_unstructured.documents.coordinates import PixelSpace
+from bisheng_unstructured.documents.elements import (
     CoordinatesMetadata,
     Element,
     ElementMetadata,
@@ -21,22 +21,22 @@
     Text,
     process_metadata,
 )
-from unstructured.file_utils.filetype import (
+from bisheng_unstructured.file_utils.filetype import (
     FileType,
     add_metadata_with_filetype,
     document_to_element_list,
 )
-from unstructured.nlp.patterns import PARAGRAPH_PATTERN
-from unstructured.partition.common import (
+from bisheng_unstructured.nlp.patterns import PARAGRAPH_PATTERN
+from bisheng_unstructured.partition.common import (
     convert_to_bytes,
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
     spooled_to_bytes_io_if_needed,
 )
-from unstructured.partition.strategies import determine_pdf_or_image_strategy
-from unstructured.partition.text import element_from_text, partition_text
-from unstructured.utils import requires_dependencies
+from bisheng_unstructured.partition.strategies import determine_pdf_or_image_strategy
+from bisheng_unstructured.partition.text import element_from_text, partition_text
+from bisheng_unstructured.utils import requires_dependencies
 
 RE_MULTISPACE_INCLUDING_NEWLINES = re.compile(pattern=r"\s+", flags=re.DOTALL)
 
@@ -227,7 +227,7 @@ def _partition_pdf_or_image_local(
     **kwargs,
 ) -> List[Element]:
     """Partition using package installed locally."""
-    from unstructured_inference.inference.layout import (
+    from bisheng_unstructured.inference.inference.layout import (
         process_data_with_model,
         process_file_with_model,
     )
diff --git a/src/unstructured/partition/ppt.py b/src/bisheng_unstructured/partition/ppt.py
similarity index 89%
rename from src/unstructured/partition/ppt.py
rename to src/bisheng_unstructured/partition/ppt.py
index 9d9598e..fd0dbad 100644
--- a/src/unstructured/partition/ppt.py
+++ b/src/bisheng_unstructured/partition/ppt.py
@@ -2,15 +2,15 @@
 import tempfile
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     convert_office_doc,
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.pptx import partition_pptx
+from bisheng_unstructured.partition.pptx import partition_pptx
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/pptx.py b/src/bisheng_unstructured/partition/pptx.py
similarity index 95%
rename from src/unstructured/partition/pptx.py
rename to src/bisheng_unstructured/partition/pptx.py
index 0c2bd69..d075b0d 100644
--- a/src/unstructured/partition/pptx.py
+++ b/src/bisheng_unstructured/partition/pptx.py
@@ -3,7 +3,7 @@
 
 import pptx
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     Element,
     ElementMetadata,
     EmailAddress,
@@ -15,15 +15,15 @@
     Title,
     process_metadata,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     convert_ms_office_table_to_text,
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
     spooled_to_bytes_io_if_needed,
 )
-from unstructured.partition.text_type import (
+from bisheng_unstructured.partition.text_type import (
     is_email_address,
     is_possible_narrative_text,
     is_possible_title,
diff --git a/src/unstructured/partition/rst.py b/src/bisheng_unstructured/partition/rst.py
similarity index 82%
rename from src/unstructured/partition/rst.py
rename to src/bisheng_unstructured/partition/rst.py
index c452426..19cdad1 100644
--- a/src/unstructured/partition/rst.py
+++ b/src/bisheng_unstructured/partition/rst.py
@@ -1,8 +1,8 @@
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.html import convert_and_partition_html
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.html import convert_and_partition_html
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/rtf.py b/src/bisheng_unstructured/partition/rtf.py
similarity index 82%
rename from src/unstructured/partition/rtf.py
rename to src/bisheng_unstructured/partition/rtf.py
index 1dbe9b5..e68e017 100644
--- a/src/unstructured/partition/rtf.py
+++ b/src/bisheng_unstructured/partition/rtf.py
@@ -1,8 +1,8 @@
 from typing import IO, List, Optional
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.html import convert_and_partition_html
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.html import convert_and_partition_html
 
 
 @process_metadata()
diff --git a/src/unstructured/partition/strategies.py b/src/bisheng_unstructured/partition/strategies.py
similarity index 97%
rename from src/unstructured/partition/strategies.py
rename to src/bisheng_unstructured/partition/strategies.py
index e66040e..1615e2b 100644
--- a/src/unstructured/partition/strategies.py
+++ b/src/bisheng_unstructured/partition/strategies.py
@@ -1,8 +1,8 @@
 from tempfile import SpooledTemporaryFile
 from typing import BinaryIO, Dict, List, Optional, Union
 
-from unstructured.logger import logger
-from unstructured.utils import dependency_exists
+from bisheng_unstructured.logger import logger
+from bisheng_unstructured.utils import dependency_exists
 
 VALID_STRATEGIES: Dict[str, List[str]] = {
     "auto": [
diff --git a/src/unstructured/partition/text.py b/src/bisheng_unstructured/partition/text.py
similarity index 94%
rename from src/unstructured/partition/text.py
rename to src/bisheng_unstructured/partition/text.py
index b3126d2..83905fd 100644
--- a/src/unstructured/partition/text.py
+++ b/src/bisheng_unstructured/partition/text.py
@@ -2,12 +2,12 @@
 import textwrap
 from typing import IO, Callable, List, Optional, Tuple
 
-from unstructured.cleaners.core import (
+from bisheng_unstructured.cleaners.core import (
     auto_paragraph_grouper,
     clean_bullets,
 )
-from unstructured.documents.coordinates import CoordinateSystem
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.coordinates import CoordinateSystem
+from bisheng_unstructured.documents.elements import (
     Address,
     Element,
     ElementMetadata,
@@ -18,16 +18,16 @@
     Title,
     process_metadata,
 )
-from unstructured.file_utils.encoding import read_txt_file
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.nlp.patterns import PARAGRAPH_PATTERN
-from unstructured.nlp.tokenize import sent_tokenize
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.encoding import read_txt_file
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.nlp.patterns import PARAGRAPH_PATTERN
+from bisheng_unstructured.nlp.tokenize import sent_tokenize
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
 )
-from unstructured.partition.text_type import (
+from bisheng_unstructured.partition.text_type import (
     is_bulleted_text,
     is_email_address,
     is_possible_narrative_text,
diff --git a/src/unstructured/partition/text_type.py b/src/bisheng_unstructured/partition/text_type.py
similarity index 97%
rename from src/unstructured/partition/text_type.py
rename to src/bisheng_unstructured/partition/text_type.py
index d515011..7507af8 100644
--- a/src/unstructured/partition/text_type.py
+++ b/src/bisheng_unstructured/partition/text_type.py
@@ -9,10 +9,10 @@
 else:
     from typing import Final
 
-from unstructured.cleaners.core import remove_punctuation
-from unstructured.logger import trace_logger
-from unstructured.nlp.english_words import ENGLISH_WORDS
-from unstructured.nlp.patterns import (
+from bisheng_unstructured.cleaners.core import remove_punctuation
+from bisheng_unstructured.logger import trace_logger
+from bisheng_unstructured.nlp.english_words import ENGLISH_WORDS
+from bisheng_unstructured.nlp.patterns import (
     EMAIL_ADDRESS_PATTERN_RE,
     ENDS_IN_PUNCT_RE,
     UNICODE_BULLETS_RE,
@@ -20,7 +20,7 @@
     US_PHONE_NUMBERS_RE,
     ZH_PUNC_NOT_IN_TITLE_RE,
 )
-from unstructured.nlp.tokenize import pos_tag, sent_tokenize, word_tokenize
+from bisheng_unstructured.nlp.tokenize import pos_tag, sent_tokenize, word_tokenize
 
 POS_VERB_TAGS: Final[List[str]] = ["VB", "VBG", "VBD", "VBN", "VBP", "VBZ"]
 ENGLISH_WORD_SPLIT_RE = re.compile(r"[\s\-,.!?_\/]+")
diff --git a/src/unstructured/partition/tsv.py b/src/bisheng_unstructured/partition/tsv.py
similarity index 91%
rename from src/unstructured/partition/tsv.py
rename to src/bisheng_unstructured/partition/tsv.py
index 0fd2a89..b592ec5 100644
--- a/src/unstructured/partition/tsv.py
+++ b/src/bisheng_unstructured/partition/tsv.py
@@ -4,14 +4,14 @@
 import pandas as pd
 from lxml.html.soupparser import fromstring as soupparser_fromstring
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     Element,
     ElementMetadata,
     Table,
     process_metadata,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
diff --git a/src/unstructured/partition/xlsx.py b/src/bisheng_unstructured/partition/xlsx.py
similarity index 92%
rename from src/unstructured/partition/xlsx.py
rename to src/bisheng_unstructured/partition/xlsx.py
index 2f45382..4b4b8cb 100644
--- a/src/unstructured/partition/xlsx.py
+++ b/src/bisheng_unstructured/partition/xlsx.py
@@ -4,14 +4,14 @@
 import pandas as pd
 from lxml.html.soupparser import fromstring as soupparser_fromstring
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     Element,
     ElementMetadata,
     Table,
     process_metadata,
 )
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
diff --git a/src/unstructured/partition/xml.py b/src/bisheng_unstructured/partition/xml.py
similarity index 91%
rename from src/unstructured/partition/xml.py
rename to src/bisheng_unstructured/partition/xml.py
index dc4a153..1adbe62 100644
--- a/src/unstructured/partition/xml.py
+++ b/src/bisheng_unstructured/partition/xml.py
@@ -2,16 +2,16 @@
 from tempfile import SpooledTemporaryFile
 from typing import IO, BinaryIO, List, Optional, Union, cast
 
-from unstructured.documents.elements import Element, process_metadata
-from unstructured.file_utils.encoding import read_txt_file
-from unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
-from unstructured.partition.common import (
+from bisheng_unstructured.documents.elements import Element, process_metadata
+from bisheng_unstructured.file_utils.encoding import read_txt_file
+from bisheng_unstructured.file_utils.filetype import FileType, add_metadata_with_filetype
+from bisheng_unstructured.partition.common import (
     exactly_one,
     get_last_modified_date,
     get_last_modified_date_from_file,
     spooled_to_bytes_io_if_needed,
 )
-from unstructured.partition.text import partition_text
+from bisheng_unstructured.partition.text import partition_text
 
 
 def is_leaf(elem):
diff --git a/src/unstructured/ingest/connector/__init__.py b/src/bisheng_unstructured/staging/__init__.py
similarity index 100%
rename from src/unstructured/ingest/connector/__init__.py
rename to src/bisheng_unstructured/staging/__init__.py
diff --git a/src/unstructured/staging/argilla.py b/src/bisheng_unstructured/staging/argilla.py
similarity index 94%
rename from src/unstructured/staging/argilla.py
rename to src/bisheng_unstructured/staging/argilla.py
index 7e4216a..b25a6c9 100644
--- a/src/unstructured/staging/argilla.py
+++ b/src/bisheng_unstructured/staging/argilla.py
@@ -7,8 +7,8 @@
     TokenClassificationRecord,
 )
 
-from unstructured.documents.elements import Text
-from unstructured.nlp.tokenize import word_tokenize
+from bisheng_unstructured.documents.elements import Text
+from bisheng_unstructured.nlp.tokenize import word_tokenize
 
 
 def stage_for_argilla(
diff --git a/src/unstructured/staging/base.py b/src/bisheng_unstructured/staging/base.py
similarity index 97%
rename from src/unstructured/staging/base.py
rename to src/bisheng_unstructured/staging/base.py
index 0d2d012..f127d68 100644
--- a/src/unstructured/staging/base.py
+++ b/src/bisheng_unstructured/staging/base.py
@@ -3,15 +3,15 @@
 import json
 from typing import Any, Dict, List, Optional
 
-from unstructured.documents.elements import (
+from bisheng_unstructured.documents.elements import (
     TYPE_TO_TEXT_ELEMENT_MAP,
     CheckBox,
     Element,
     ElementMetadata,
     NoID,
 )
-from unstructured.partition.common import exactly_one
-from unstructured.utils import dependency_exists, requires_dependencies
+from bisheng_unstructured.partition.common import exactly_one
+from bisheng_unstructured.utils import dependency_exists, requires_dependencies
 
 if dependency_exists("pandas"):
     import pandas as pd
diff --git a/src/unstructured/staging/baseplate.py b/src/bisheng_unstructured/staging/baseplate.py
similarity index 93%
rename from src/unstructured/staging/baseplate.py
rename to src/bisheng_unstructured/staging/baseplate.py
index e1e0122..1993907 100644
--- a/src/unstructured/staging/baseplate.py
+++ b/src/bisheng_unstructured/staging/baseplate.py
@@ -1,7 +1,7 @@
 from typing import Dict, List, TypedDict
 
-from unstructured.documents.elements import Text
-from unstructured.staging.base import flatten_dict
+from bisheng_unstructured.documents.elements import Text
+from bisheng_unstructured.staging.base import flatten_dict
 
 
 class BaseplateRow(TypedDict):
diff --git a/src/unstructured/staging/datasaur.py b/src/bisheng_unstructured/staging/datasaur.py
similarity index 96%
rename from src/unstructured/staging/datasaur.py
rename to src/bisheng_unstructured/staging/datasaur.py
index a7f96ae..607fe0e 100644
--- a/src/unstructured/staging/datasaur.py
+++ b/src/bisheng_unstructured/staging/datasaur.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, Optional
 
-from unstructured.documents.elements import Text
+from bisheng_unstructured.documents.elements import Text
 
 
 def stage_for_datasaur(
diff --git a/src/unstructured/staging/huggingface.py b/src/bisheng_unstructured/staging/huggingface.py
similarity index 97%
rename from src/unstructured/staging/huggingface.py
rename to src/bisheng_unstructured/staging/huggingface.py
index 3f67bb9..038b74f 100644
--- a/src/unstructured/staging/huggingface.py
+++ b/src/bisheng_unstructured/staging/huggingface.py
@@ -3,7 +3,7 @@
 
 from transformers import PreTrainedTokenizer
 
-from unstructured.documents.elements import Element, NarrativeText, Text
+from bisheng_unstructured.documents.elements import Element, NarrativeText, Text
 
 
 def stage_for_transformers(
diff --git a/src/unstructured/staging/label_box.py b/src/bisheng_unstructured/staging/label_box.py
similarity index 98%
rename from src/unstructured/staging/label_box.py
rename to src/bisheng_unstructured/staging/label_box.py
index 7e09491..453a0cd 100644
--- a/src/unstructured/staging/label_box.py
+++ b/src/bisheng_unstructured/staging/label_box.py
@@ -2,7 +2,7 @@
 import uuid
 from typing import Any, Dict, List, Optional, Sequence, Union
 
-from unstructured.documents.elements import UUID, NoID, Text
+from bisheng_unstructured.documents.elements import UUID, NoID, Text
 
 VALID_ATTACHMENT_TYPES: List[str] = ["IMAGE", "VIDEO", "RAW_TEXT", "TEXT_URL", "HTML"]
 
diff --git a/src/unstructured/staging/label_studio.py b/src/bisheng_unstructured/staging/label_studio.py
similarity index 98%
rename from src/unstructured/staging/label_studio.py
rename to src/bisheng_unstructured/staging/label_studio.py
index c94fe43..4dc0dd3 100644
--- a/src/unstructured/staging/label_studio.py
+++ b/src/bisheng_unstructured/staging/label_studio.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Union
 
-from unstructured.documents.elements import Text
+from bisheng_unstructured.documents.elements import Text
 
 LABEL_STUDIO_TYPE = List[Dict[str, Dict[str, str]]]
 
diff --git a/src/unstructured/staging/prodigy.py b/src/bisheng_unstructured/staging/prodigy.py
similarity index 98%
rename from src/unstructured/staging/prodigy.py
rename to src/bisheng_unstructured/staging/prodigy.py
index e4d5a99..55de695 100644
--- a/src/unstructured/staging/prodigy.py
+++ b/src/bisheng_unstructured/staging/prodigy.py
@@ -2,7 +2,7 @@
 import io
 from typing import Dict, Generator, Iterable, List, Optional, Union
 
-from unstructured.documents.elements import Text
+from bisheng_unstructured.documents.elements import Text
 
 PRODIGY_TYPE = List[Dict[str, Union[str, Dict[str, str]]]]
 
diff --git a/src/unstructured/staging/weaviate.py b/src/bisheng_unstructured/staging/weaviate.py
similarity index 97%
rename from src/unstructured/staging/weaviate.py
rename to src/bisheng_unstructured/staging/weaviate.py
index 6c11a16..0b2030d 100644
--- a/src/unstructured/staging/weaviate.py
+++ b/src/bisheng_unstructured/staging/weaviate.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, TypedDict
 
-from unstructured.documents.elements import ElementMetadata, Text
+from bisheng_unstructured.documents.elements import ElementMetadata, Text
 
 
 class Properties(TypedDict):
diff --git a/src/unstructured/utils.py b/src/bisheng_unstructured/utils.py
similarity index 100%
rename from src/unstructured/utils.py
rename to src/bisheng_unstructured/utils.py
diff --git a/src/unstructured/ingest/README.md b/src/unstructured/ingest/README.md
deleted file mode 100644
index cc2d77e..0000000
--- a/src/unstructured/ingest/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-# Batch Processing Documents
-
-## The unstructured-ingest CLI
-
-The unstructured library includes a CLI to batch ingest documents from (soon to be
-various) sources, storing structured outputs locally on the filesystem.
-
-For example, the following command processes all the documents in S3 in the
-`utic-dev-tech-fixtures` bucket with a prefix of `small-pdf-set/`.
-
-    unstructured-ingest \
-       s3 \
-       --remote-url s3://utic-dev-tech-fixtures/small-pdf-set/ \
-       --anonymous \
-       --structured-output-dir s3-small-batch-output \
-       --num-processes 2
-
-Naturally, --num-processes may be adjusted for better instance utilization with multiprocessing.
-
-Installation note: make sure to install the following extras when installing unstructured, needed for the above command:
-
-    pip install "unstructured[s3,local-inference]"
-
-See the [Quick Start](https://github.com/Unstructured-IO/unstructured#eight_pointed_black_star-quick-start) which documents how to pip install `dectectron2` and other OS dependencies, necessary for the parsing of .PDF files.
-
-# Developers' Guide
-
-## Local testing
-
-When testing from a local checkout rather than a pip-installed version of `unstructured`,
-just execute `unstructured/ingest/main.py`, e.g.:
-
-    PYTHONPATH=. ./unstructured/ingest/main.py \
-       s3 \
-       --remote-url s3://utic-dev-tech-fixtures/small-pdf-set/ \
-       --anonymous \
-       --structured-output-dir s3-small-batch-output \
-       --num-processes 2
-
-## Adding Data Connectors
-
-To add a connector, refer to [unstructured/ingest/connector/github.py](unstructured/ingest/connector/github.py) as example that implements the three relevant abstract base classes.
-
-If the connector has an available `fsspec` implementation, then refer to [unstructured/ingest/connector/s3.py](unstructured/ingest/connector/s3.py).
-
-Then, update [unstructured/ingest/main.py/cli](unstructured/ingest/cli) to add a subcommand associated with the connector, and hook it up to the parent group.
-
-Create at least one folder [examples/ingest](examples/ingest) with an easily reproducible
-script that shows the new connector in action.
-
-Finally, to ensure the connector remains stable, add a new script test_unstructured_ingest/test-ingest-\<the-new-data-source\>.sh similar to [test_unstructured_ingest/test-ingest-s3.sh](test_unstructured_ingest/test-ingest-s3.sh), and append a line invoking the new script in [test_unstructured_ingest/test-ingest.sh](test_unstructured_ingest/test-ingest.sh).
-
-You'll notice that the unstructured outputs for the new documents are expected
-to be checked into CI under test_unstructured_ingest/expected-structured-output/\<folder-name-relevant-to-your-dataset\>. So, you'll need to `git add` those json outputs so that `test-ingest.sh` passes in CI.
-
-The `main.py` flags of --re-download/--no-re-download , --download-dir, --preserve-downloads, --structured-output-dir, and --reprocess are honored by the connector.
-
-### The checklist:
-
-In checklist form, the above steps are summarized as:
-
-- [ ] Create a new module under [unstructured/ingest/connector/](unstructured/ingest/connector/) implementing the 3 abstract base classes, similar to [unstructured/ingest/connector/github.py](unstructured/ingest/connector/github.py).
-  - [ ] The subclass of `BaseIngestDoc` overrides `process_file()` if extra processing logic is needed other than what is provided by [auto.partition()](unstructured/partition/auto.py).
-- [ ] Update [unstructured/ingest/cli](unstructured/ingest/cli) with support for the new connector.
-- [ ] Create a folder under [examples/ingest](examples/ingest) that includes at least one well documented script.
-- [ ] Add a script test_unstructured_ingest/test-ingest-\<the-new-data-source\>.sh. It's json output files should have a total of no more than 100K.
-- [ ] Git add the expected outputs under test_unstructured_ingest/expected-structured-output/\<folder-name-relevant-to-your-dataset\> so the above test passes in CI.
-- [ ] Add a line to [test_unstructured_ingest/test-ingest.sh](test_unstructured_ingest/test-ingest.sh) invoking the new test script.
-- [ ] If additional python dependencies are needed for the new connector:
-  - [ ] Add them as an extra to [setup.py](unstructured/setup.py).
-  - [ ] Update the Makefile, adding a target for `install-ingest-<name>` and adding another `pip-compile` line to the `pip-compile` make target. See [this commit](https://github.com/Unstructured-IO/unstructured/commit/ab542ca3c6274f96b431142262d47d727f309e37) for a reference.
-  - [ ] The added dependencies should be imported at runtime when the new connector is invoked, rather than as top-level imports.
-  - [ ] Add the decorator `unstructured.utils.requires_dependencies` on top of each class instance or function that uses those connector-specific dependencies e.g. for `GitHubConnector` should look like `@requires_dependencies(dependencies=["github"], extras="github")`
-  - [ ] Run `make tidy` and `make check` to ensure linting checks pass.
-- [ ] Honors the conventions of `BaseConnectorConfig` defined in [unstructured/ingest/interfaces.py](unstructured/ingest/interfaces.py) which is passed through [the CLI](unstructured/ingest/main.py):
-  - [ ] If running with an `.output_dir` where structured outputs already exists for a given file, the file content is not re-downloaded from the data source nor is it reprocessed. This is made possible by implementing the call to `MyIngestDoc.has_output()` which is invoked in [MainProcess._filter_docs_with_outputs](ingest-prep-for-many/unstructured/ingest/main.py).
-  - [ ] Unless `.reprocess` is `True`, then documents are always reprocessed.
-  - [ ] If `.preserve_download` is `True`, documents downloaded to `.download_dir` are not removed after processing.
-  - [ ] Else if `.preserve_download` is `False`, documents downloaded to `.download_dir` are removed after they are **successfully** processed during the invocation of `MyIngestDoc.cleanup_file()` in [process_document](unstructured/ingest/doc_processor/generalized.py)
-  - [ ] Does not re-download documents to `.download_dir` if `.re_download` is False, enforced in `MyIngestDoc.get_file()`
-  - [ ] Prints more details if `--verbose` in ingest CLI, similar to [unstructured/ingest/connector/github.py](unstructured/ingest/connector/github.py) logging messages.
diff --git a/src/unstructured/ingest/cli/cli.py b/src/unstructured/ingest/cli/cli.py
deleted file mode 100644
index 4fd8da6..0000000
--- a/src/unstructured/ingest/cli/cli.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import click
-
-import unstructured.ingest.cli.cmds as cli_cmds
-
-
-@click.group()
-def ingest():
-    pass
-
-
-# Dynamically update shared options for supported subcommands
-subcommands = [
-    cli_cmds.box,
-    cli_cmds.s3,
-    cli_cmds.gcs,
-    cli_cmds.dropbox,
-    cli_cmds.azure,
-    cli_cmds.fsspec,
-    cli_cmds.github,
-    cli_cmds.gitlab,
-    cli_cmds.reddit,
-    cli_cmds.slack,
-    cli_cmds.discord,
-    cli_cmds.wikipedia,
-    cli_cmds.gdrive,
-    cli_cmds.biomed,
-    cli_cmds.notion,
-    cli_cmds.onedrive,
-    cli_cmds.outlook,
-    cli_cmds.local,
-    cli_cmds.elasticsearch,
-    cli_cmds.confluence,
-    cli_cmds.sharepoint,
-    cli_cmds.airtable,
-]
-
-for subcommand in subcommands:
-    ingest.add_command(subcommand())
-
-
-def get_cmd() -> click.Command:
-    cmd = ingest
-    # Add all subcommands
-    for subcommand in subcommands:
-        # add_shared_options(cmd)
-        cmd.add_command(subcommand())
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/__init__.py b/src/unstructured/ingest/cli/cmds/__init__.py
deleted file mode 100644
index d8cb14f..0000000
--- a/src/unstructured/ingest/cli/cmds/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from .airtable import get_cmd as airtable
-from .azure import get_cmd as azure
-from .biomed import get_cmd as biomed
-from .box import get_cmd as box
-from .confluence import get_cmd as confluence
-from .discord import get_cmd as discord
-from .dropbox import get_cmd as dropbox
-from .elasticsearch import get_cmd as elasticsearch
-from .fsspec import get_cmd as fsspec
-from .gcs import get_cmd as gcs
-from .github import get_cmd as github
-from .gitlab import get_cmd as gitlab
-from .google_drive import get_cmd as gdrive
-from .local import get_cmd as local
-from .notion import get_cmd as notion
-from .onedrive import get_cmd as onedrive
-from .outlook import get_cmd as outlook
-from .reddit import get_cmd as reddit
-from .s3 import get_cmd as s3
-from .sharepoint import get_cmd as sharepoint
-from .slack import get_cmd as slack
-from .wikipedia import get_cmd as wikipedia
-
-__all__ = [
-    "airtable",
-    "azure",
-    "biomed",
-    "box",
-    "confluence",
-    "discord",
-    "dropbox",
-    "elasticsearch",
-    "fsspec",
-    "gcs",
-    "gdrive",
-    "github",
-    "gitlab",
-    "local",
-    "notion",
-    "onedrive",
-    "outlook",
-    "reddit",
-    "s3",
-    "sharepoint",
-    "slack",
-    "wikipedia",
-]
diff --git a/src/unstructured/ingest/cli/cmds/airtable.py b/src/unstructured/ingest/cli/cmds/airtable.py
deleted file mode 100644
index 0e15a62..0000000
--- a/src/unstructured/ingest/cli/cmds/airtable.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import airtable as airtable_fn
-
-
-@click.command()
-@click.option(
-    "--personal-access-token",
-    default=None,
-    help="Personal access token to authenticate into Airtable. Check: \
-    https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens for more info",
-)
-@click.option(
-    "--list-of-paths",
-    default=None,
-    help="""A list of paths that specify the locations to ingest data from within Airtable.
-
-    If this argument is not set, the connector ingests all tables within each and every base.
-    --list-of-paths: path1 path2 path3 ….
-    path: base_id/table_id(optional)/view_id(optional)/
-
-    To obtain (base, table, view) ids in bulk, check:
-    https://airtable.com/developers/web/api/list-bases (base ids)
-    https://airtable.com/developers/web/api/get-base-schema (table and view ids)
-    https://pyairtable.readthedocs.io/en/latest/metadata.html (base, table and view ids)
-
-    To obtain specific ids from Airtable UI, go to your workspace, and copy any
-    relevant id from the URL structure:
-    https://airtable.com/appAbcDeF1ghijKlm/tblABcdEfG1HIJkLm/viwABCDEfg6hijKLM
-    appAbcDeF1ghijKlm -> base_id
-    tblABcdEfG1HIJkLm -> table_id
-    viwABCDEfg6hijKLM -> view_id
-
-    You can also check: https://support.airtable.com/docs/finding-airtable-ids
-
-    Here is an example for one --list-of-paths:
-        base1/		→ gets the entirety of all tables inside base1
-        base1/table1		→ gets all rows and columns within table1 in base1
-        base1/table1/view1	→ gets the rows and columns that are
-                              visible in view1 for the table1 in base1
-
-    Examples to invalid airtable_paths:
-        table1          → has to mention base to be valid
-        base1/view1     → has to mention table to be valid
-    """,
-)
-def airtable(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        airtable_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = airtable
-    add_shared_options(cmd)
-    add_recursive_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/azure.py b/src/unstructured/ingest/cli/cmds/azure.py
deleted file mode 100644
index e9cf4f6..0000000
--- a/src/unstructured/ingest/cli/cmds/azure.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import azure as azure_fn
-
-
-@click.command()
-@click.option(
-    "--account-key",
-    default=None,
-    help="Azure Blob Storage or DataLake account key (not required if "
-    "`azure_account_name` is public).",
-)
-@click.option(
-    "--account-name",
-    default=None,
-    help="Azure Blob Storage or DataLake account name.",
-)
-@click.option(
-    "--connection-string",
-    default=None,
-    help="Azure Blob Storage or DataLake connection string.",
-)
-def azure(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        azure_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = azure
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/biomed.py b/src/unstructured/ingest/cli/cmds/biomed.py
deleted file mode 100644
index aef05ab..0000000
--- a/src/unstructured/ingest/cli/cmds/biomed.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import biomed as biomed_fn
-
-
-@click.command()
-@click.option(
-    "--api-id",
-    default=None,
-    help="ID parameter for OA Web Service API.",
-)
-@click.option(
-    "--api-from",
-    default=None,
-    help="From parameter for OA Web Service API.",
-)
-@click.option(
-    "--api-until",
-    default=None,
-    help="Until parameter for OA Web Service API.",
-)
-@click.option(
-    "--decay",
-    default=0.3,
-    help="(In float) Factor to multiply the delay between retries.",
-)
-@click.option(
-    "--path",
-    default=None,
-    help="PMC Open Access FTP Directory Path.",
-)
-@click.option(
-    "--max-request-time",
-    default=45,
-    help="(In seconds) Max request time to OA Web Service API.",
-)
-@click.option(
-    "--max-retries",
-    default=1,
-    help="Max requests to OA Web Service API.",
-)
-def biomed(
-    **options,
-):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        biomed_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = biomed
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/box.py b/src/unstructured/ingest/cli/cmds/box.py
deleted file mode 100644
index 189ee8f..0000000
--- a/src/unstructured/ingest/cli/cmds/box.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import box as box_fn
-
-
-@click.command()
-@click.option(
-    "--box-app-config",
-    default=None,
-    help="Path to Box app credentials as json file.",
-)
-def box(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        box_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = box
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/confluence.py b/src/unstructured/ingest/cli/cmds/confluence.py
deleted file mode 100644
index 95c9f37..0000000
--- a/src/unstructured/ingest/cli/cmds/confluence.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import confluence as confluence_fn
-
-
-@click.command()
-@click.option(
-    "--api-token",
-    required=True,
-    help="API Token to authenticate into Confluence Cloud. \
-        Check https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ \
-        for more info.",
-)
-@click.option(
-    "--list-of-spaces",
-    default=None,
-    help="A list of confluence space ids to be fetched. From each fetched space, \
-        --confluence-num-of-docs-from-each-space number of docs will be ingested. \
-        --confluence-list-of-spaces and --confluence-num-of-spaces cannot be used at the same time",
-)
-@click.option(
-    "--max-num-of-docs-from-each-space",
-    default=100,
-    help="Number of documents to be aimed to be ingested from each fetched confluence space. \
-        If any space has fewer documents, all the documents from that space will be ingested. \
-        Documents are not necessarily ingested in order of creation date.",
-)
-@click.option(
-    "--max-num-of-spaces",
-    default=500,
-    help="Number of confluence space ids to be fetched. From each fetched space, \
-        --confluence-num-of-docs-from-each-space number of docs will be ingested. \
-        --confluence-list-of-spaces and --confluence-num-of-spaces cannot be used at the same time",
-)
-@click.option(
-    "--url",
-    required=True,
-    help='URL to Confluence Cloud, e.g. "unstructured-ingest-test.atlassian.net"',
-)
-@click.option(
-    "--user-email",
-    required=True,
-    help="Email to authenticate into Confluence Cloud",
-)
-def confluence(
-    **options,
-):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        confluence_fn(
-            connector_config=connector_config,
-            processor_config=processor_config,
-            **options,
-        )
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = confluence
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/discord.py b/src/unstructured/ingest/cli/cmds/discord.py
deleted file mode 100644
index efc8c4f..0000000
--- a/src/unstructured/ingest/cli/cmds/discord.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import discord as discord_fn
-
-
-@click.command()
-@click.option(
-    "--channels",
-    required=True,
-    help="A comma separated list of discord channel ids to ingest from.",
-)
-@click.option(
-    "--period",
-    default=None,
-    help="Number of days to go back in the history of discord channels, must be a number",
-)
-@click.option(
-    "--token",
-    required=True,
-    help="Bot token used to access Discord API, must have "
-    "READ_MESSAGE_HISTORY scope for the bot user",
-)
-def discord(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        discord_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = discord
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/dropbox.py b/src/unstructured/ingest/cli/cmds/dropbox.py
deleted file mode 100644
index 83895c5..0000000
--- a/src/unstructured/ingest/cli/cmds/dropbox.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import dropbox as dropbox_fn
-
-
-@click.command()
-@click.option(
-    "--token",
-    required=True,
-    help="Dropbox access token.",
-)
-def dropbox(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        dropbox_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = dropbox
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    add_recursive_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/elasticsearch.py b/src/unstructured/ingest/cli/cmds/elasticsearch.py
deleted file mode 100644
index b1a4773..0000000
--- a/src/unstructured/ingest/cli/cmds/elasticsearch.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import elasticsearch as elasticsearch_fn
-
-
-@click.command()
-@click.option(
-    "--index-name",
-    required=True,
-    help="Name for the Elasticsearch index to pull data from",
-)
-@click.option(
-    "--jq-query",
-    default=None,
-    help="JQ query to get and concatenate a subset of the fields from a JSON document. "
-    "For a group of JSON documents, it assumes that all of the documents have the same schema. "
-    "Currently only supported for the Elasticsearch connector. "
-    "Example: --jq-query '{meta, body}'",
-)
-@click.option(
-    "--url",
-    required=True,
-    help='URL to the Elasticsearch cluster, e.g. "http://localhost:9200"',
-)
-def elasticsearch(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        elasticsearch_fn(
-            connector_config=connector_config,
-            processor_config=processor_config,
-            **options,
-        )
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = elasticsearch
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/fsspec.py b/src/unstructured/ingest/cli/cmds/fsspec.py
deleted file mode 100644
index 8ab9d7a..0000000
--- a/src/unstructured/ingest/cli/cmds/fsspec.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import fsspec as fsspec_fn
-
-
-@click.command()
-def fsspec(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        fsspec_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = fsspec
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/gcs.py b/src/unstructured/ingest/cli/cmds/gcs.py
deleted file mode 100644
index a8b5c1f..0000000
--- a/src/unstructured/ingest/cli/cmds/gcs.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import gcs as gcs_fn
-
-
-@click.command()
-@click.option(
-    "--token",
-    default=None,
-    help="Token used to access Google Cloud. GCSFS will attempt to use your default gcloud creds"
-    "or get creds from the google metadata service or fall back to anonymous access.",
-)
-def gcs(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        gcs_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = gcs
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/github.py b/src/unstructured/ingest/cli/cmds/github.py
deleted file mode 100644
index d99445a..0000000
--- a/src/unstructured/ingest/cli/cmds/github.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import github as github_fn
-
-
-@click.command()
-@click.option(
-    "--git-access-token",
-    default=None,
-    help="A GitHub or GitLab access token, see https://docs.github.com/en/authentication "
-    " or https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
-)
-@click.option(
-    "--git-branch",
-    default=None,
-    help="The branch for which to fetch files from. If not given,"
-    " the default repository branch is used.",
-)
-@click.option(
-    "--git-file-glob",
-    default=None,
-    help="A comma-separated list of file globs to limit which types of files are accepted,"
-    " e.g. '*.html,*.txt'",
-)
-@click.option(
-    "--url",
-    required=True,
-    help='URL to GitHub repository, e.g. "https://github.com/Unstructured-IO/unstructured",'
-    ' or a repository owner/name pair, e.g. "Unstructured-IO/unstructured"',
-)
-def github(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        github_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = github
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/gitlab.py b/src/unstructured/ingest/cli/cmds/gitlab.py
deleted file mode 100644
index 001a36e..0000000
--- a/src/unstructured/ingest/cli/cmds/gitlab.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import gitlab as gitlab_fn
-
-
-@click.command()
-@click.option(
-    "--git-access-token",
-    default=None,
-    help="A GitHub or GitLab access token, see https://docs.github.com/en/authentication "
-    " or https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens",
-)
-@click.option(
-    "--git-branch",
-    default=None,
-    help="The branch for which to fetch files from. If not given,"
-    " the default repository branch is used.",
-)
-@click.option(
-    "--git-file-glob",
-    default=None,
-    help="A comma-separated list of file globs to limit which types of files are accepted,"
-    " e.g. '*.html,*.txt'",
-)
-@click.option(
-    "--url",
-    required=True,
-    help='URL to GitLab repository, e.g. "https://gitlab.com/gitlab-com/content-sites/docsy-gitlab"'
-    ', or a repository path, e.g. "gitlab-com/content-sites/docsy-gitlab"',
-)
-def gitlab(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        gitlab_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = gitlab
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/google_drive.py b/src/unstructured/ingest/cli/cmds/google_drive.py
deleted file mode 100644
index 4cf239d..0000000
--- a/src/unstructured/ingest/cli/cmds/google_drive.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import gdrive as gdrive_fn
-
-
-@click.command()
-@click.option(
-    "--drive-id",
-    required=True,
-    help="Google Drive File or Folder ID.",
-)
-@click.option(
-    "--extension",
-    default=None,
-    help="Filters the files to be processed based on extension e.g. .jpg, .docx, etc.",
-)
-@click.option(
-    "--service-account-key",
-    required=True,
-    help="Path to the Google Drive service account json file.",
-)
-def gdrive(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        gdrive_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = gdrive
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/local.py b/src/unstructured/ingest/cli/cmds/local.py
deleted file mode 100644
index 37317d4..0000000
--- a/src/unstructured/ingest/cli/cmds/local.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import local as local_fn
-
-
-@click.command()
-@click.option(
-    "--file-glob",
-    default=None,
-    help="A comma-separated list of file globs to limit which types of local files are accepted,"
-    " e.g. '*.html,*.txt'",
-)
-@click.option(
-    "--input-path",
-    required=True,
-    help="Path to the location in the local file system that will be processed.",
-)
-def local(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        local_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = local
-    add_shared_options(cmd)
-    add_recursive_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/notion.py b/src/unstructured/ingest/cli/cmds/notion.py
deleted file mode 100644
index 32a9843..0000000
--- a/src/unstructured/ingest/cli/cmds/notion.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import notion as notion_fn
-
-
-@click.command()
-@click.option(
-    "--page-ids",
-    default=None,
-    help="Comma separated list of Notion page IDs to pull text from",
-)
-@click.option(
-    "--database-ids",
-    default=None,
-    help="Comma separated list of Notion database IDs to pull text from",
-)
-@click.option(
-    "--api-key",
-    required=True,
-    help="API key for Notion api",
-)
-def notion(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        notion_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = notion
-    add_shared_options(cmd)
-    add_recursive_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/onedrive.py b/src/unstructured/ingest/cli/cmds/onedrive.py
deleted file mode 100644
index d0b0f6c..0000000
--- a/src/unstructured/ingest/cli/cmds/onedrive.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import onedrive as onedrive_fn
-
-
-@click.command()
-@click.option(
-    "--authority-url",
-    default="https://login.microsoftonline.com",
-    help="Authentication token provider for Microsoft apps, default is "
-    "https://login.microsoftonline.com",
-)
-@click.option(
-    "--client-id",
-    required=True,
-    help="Microsoft app client ID",
-)
-@click.option(
-    "--client-cred",
-    required=True,
-    help="Microsoft App client secret",
-)
-@click.option(
-    "--path",
-    default=None,
-    help="Folder to start parsing files from.",
-)
-@click.option(
-    "--tenant",
-    default="common",
-    help="ID or domain name associated with your Azure AD instance",
-)
-@click.option(
-    "--user-pname",
-    required=True,
-    help="User principal name, usually is your Azure AD email.",
-)
-def onedrive(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        onedrive_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = onedrive
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/outlook.py b/src/unstructured/ingest/cli/cmds/outlook.py
deleted file mode 100644
index d478a8a..0000000
--- a/src/unstructured/ingest/cli/cmds/outlook.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import outlook as outlook_fn
-
-
-@click.command()
-@click.option(
-    "--authority-url",
-    default="https://login.microsoftonline.com",
-    help="Authentication token provider for Microsoft apps, default is "
-    "https://login.microsoftonline.com",
-)
-@click.option(
-    "--client-id",
-    required=True,
-    help="Microsoft app client ID",
-)
-@click.option(
-    "--client-cred",
-    default=None,
-    help="Microsoft App client secret",
-)
-@click.option(
-    "--outlook-folders",
-    default=None,
-    help="Comma separated list of folders to download email messages from. "
-    "Do not specify subfolders. Use quotes if spaces in folder names.",
-)
-@click.option(
-    "--tenant",
-    default="common",
-    help="ID or domain name associated with your Azure AD instance",
-)
-@click.option(
-    "--user-email",
-    required=True,
-    help="Outlook email to download messages from.",
-)
-def outlook(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        outlook_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = outlook
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/reddit.py b/src/unstructured/ingest/cli/cmds/reddit.py
deleted file mode 100644
index 76e2028..0000000
--- a/src/unstructured/ingest/cli/cmds/reddit.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import reddit as reddit_fn
-
-
-@click.command()
-@click.option(
-    "--client-id",
-    required=True,
-    help="The client ID, see "
-    "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites"
-    " for more information.",
-)
-@click.option(
-    "--client-secret",
-    required=True,
-    help="The client secret, see "
-    "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites"
-    " for more information.",
-)
-@click.option("--num-posts", default=10, help="The number of posts to fetch.")
-@click.option(
-    "--search-query",
-    default=None,
-    help="If set, return posts using this query. Otherwise, use hot posts.",
-)
-@click.option(
-    "--subreddit-name",
-    required=True,
-    help='The name of a subreddit, without the "r\\", e.g. "machinelearning"',
-)
-@click.option(
-    "--user-agent",
-    required=True,
-    default="Unstructured Ingest Subreddit fetcher",
-    help="The user agent to use on the Reddit API, see "
-    "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites"
-    " for more information.",
-)
-def reddit(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        reddit_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = reddit
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/s3.py b/src/unstructured/ingest/cli/cmds/s3.py
deleted file mode 100644
index aad1bf5..0000000
--- a/src/unstructured/ingest/cli/cmds/s3.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_remote_url_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import s3 as s3_fn
-
-
-@click.command()
-@click.option(
-    "--anonymous",
-    is_flag=True,
-    default=False,
-    help="Connect to s3 without local AWS credentials.",
-)
-def s3(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        s3_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = s3
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    add_remote_url_option(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/sharepoint.py b/src/unstructured/ingest/cli/cmds/sharepoint.py
deleted file mode 100644
index e7fd3c2..0000000
--- a/src/unstructured/ingest/cli/cmds/sharepoint.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_recursive_option,
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import sharepoint as sharepoint_fn
-
-
-@click.command()
-@click.option(
-    "--client-id",
-    default=None,
-    help="Sharepoint app client ID",
-)
-@click.option(
-    "--client-cred",
-    default=None,
-    help="Sharepoint app secret",
-)
-@click.option(
-    "--site",
-    default=None,
-    help="Sharepoint site url. Process either base url e.g https://[tenant].sharepoint.com \
-        or relative sites https://[tenant].sharepoint.com/sites/<site_name>.\
-        To process all sites within the tenant pass a site url as\
-        https://[tenant]-admin.sharepoint.com.\
-        This requires the app to be registered at a tenant level",
-)
-@click.option(
-    "--path",
-    default="Shared Documents",
-    help="Path from which to start parsing files. If the connector is to process all sites \
-    within the tenant this filter will be applied to all sites document libraries. \
-    Default 'Shared Documents'",
-)
-@click.option(
-    "--files-only",
-    is_flag=True,
-    default=False,
-    help="Process only files.",
-)
-def sharepoint(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        sharepoint_fn(
-            connector_config=connector_config,
-            processor_config=processor_config,
-            **options,
-        )
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = sharepoint
-    add_recursive_option(cmd)
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/slack.py b/src/unstructured/ingest/cli/cmds/slack.py
deleted file mode 100644
index aa8e191..0000000
--- a/src/unstructured/ingest/cli/cmds/slack.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import slack as slack_fn
-
-
-@click.command()
-@click.option(
-    "--channels",
-    required=True,
-    help="Comma separated list of Slack channel IDs to pull messages from, "
-    "can be a public or private channel",
-)
-@click.option(
-    "--start-date",
-    default=None,
-    help="Start date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
-    "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
-)
-@click.option(
-    "--end-date",
-    default=None,
-    help="End date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or "
-    "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz",
-)
-@click.option(
-    "--token",
-    required=True,
-    help="Bot token used to access Slack API, must have channels:history " "scope for the bot user",
-)
-def slack(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        slack_fn(connector_config=connector_config, processor_config=processor_config, **options)
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = slack
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/cmds/wikipedia.py b/src/unstructured/ingest/cli/cmds/wikipedia.py
deleted file mode 100644
index 61a1cf0..0000000
--- a/src/unstructured/ingest/cli/cmds/wikipedia.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import logging
-
-import click
-
-from unstructured.ingest.cli.common import (
-    add_shared_options,
-    log_options,
-    map_to_processor_config,
-    map_to_standard_config,
-    run_init_checks,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.runner import wikipedia as wikipedia_fn
-
-
-@click.command()
-@click.option(
-    "--auto-suggest",
-    default=True,
-    help="Whether to automatically suggest a page if the exact page was not found."
-    " Set to False if the wrong Wikipedia page is fetched.",
-)
-@click.option(
-    "--page-title",
-    required=True,
-    help='Title of a Wikipedia page, e.g. "Open source software".',
-)
-def wikipedia(**options):
-    verbose = options.get("verbose", False)
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    log_options(options)
-    try:
-        run_init_checks(**options)
-        connector_config = map_to_standard_config(options)
-        processor_config = map_to_processor_config(options)
-        wikipedia_fn(
-            connector_config=connector_config,
-            processor_config=processor_config,
-            **options,
-        )
-    except Exception as e:
-        logger.error(e, exc_info=True)
-        raise click.ClickException(str(e)) from e
-
-
-def get_cmd() -> click.Command:
-    cmd = wikipedia
-    add_shared_options(cmd)
-    return cmd
diff --git a/src/unstructured/ingest/cli/common.py b/src/unstructured/ingest/cli/common.py
deleted file mode 100644
index 046ad44..0000000
--- a/src/unstructured/ingest/cli/common.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import logging
-from typing import Optional
-
-from click import ClickException, Command, Option
-
-from unstructured.ingest.interfaces import (
-    ProcessorConfigs,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-
-
-def run_init_checks(
-    verbose: bool,
-    local_input_path: Optional[str],
-    download_dir: Optional[str],
-    metadata_exclude: Optional[str],
-    metadata_include: Optional[str],
-    flatten_metadata: bool,
-    fields_include: str,
-    partition_by_api: bool,
-    partition_endpoint: Optional[str],
-    preserve_downloads: bool,
-    download_only: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    # Initial breaking checks
-    if local_input_path is not None and download_dir:
-        raise ClickException(
-            "Files should already be in local file system: there is nothing to download, "
-            "but --download-dir is specified.",
-        )
-    if metadata_exclude is not None and metadata_include is not None:
-        raise ClickException(
-            "Arguments `--metadata-include` and `--metadata-exclude` are "
-            "mutually exclusive with each other.",
-        )
-
-    # Warnings
-    if flatten_metadata and "metadata" not in fields_include:
-        logger.warning(
-            "`--flatten-metadata` is specified, but there is no metadata to flatten, "
-            "since `--metadata` is not specified in `--fields-include`.",
-        )
-    if "metadata" not in fields_include and (metadata_include or metadata_exclude):
-        logger.warning(
-            "Either '--metadata-include` or `--metadata-exclude` is specified"
-            " while metadata is not specified in fields-include.",
-        )
-
-    if (
-        not partition_by_api
-        and partition_endpoint != "https://api.unstructured.io/general/v0/general"
-    ):
-        logger.warning(
-            "Ignoring --partition-endpoint because --partition-by-api was not set",
-        )
-    if (not preserve_downloads and not download_only) and download_dir:
-        logger.warning(
-            "Not preserving downloaded files but download_dir is specified",
-        )
-
-
-def log_options(options: dict):
-    ingest_log_streaming_init(logging.DEBUG if options["verbose"] else logging.INFO)
-    sensitive_fields = [
-        "account_name",
-        "account_key",
-        "api_key",
-        "token",
-        "client_id",
-        "client_cred",
-    ]
-    options_to_log = options.copy()
-    options_to_log.update(
-        {
-            k: "*******"
-            for k, v in options_to_log.items()
-            if k in sensitive_fields and v is not None
-        },
-    )
-    logger.debug(f"options: {options_to_log}")
-
-
-def map_to_standard_config(options: dict) -> StandardConnectorConfig:
-    return StandardConnectorConfig(
-        download_dir=options["download_dir"],
-        output_dir=options["structured_output_dir"],
-        download_only=options["download_only"],
-        fields_include=options["fields_include"],
-        flatten_metadata=options["flatten_metadata"],
-        metadata_exclude=options["metadata_exclude"],
-        metadata_include=options["metadata_include"],
-        partition_by_api=options["partition_by_api"],
-        partition_endpoint=options["partition_endpoint"],
-        preserve_downloads=options["preserve_downloads"],
-        re_download=options["re_download"],
-        api_key=options["api_key"],
-    )
-
-
-def map_to_processor_config(options: dict) -> ProcessorConfigs:
-    return ProcessorConfigs(
-        partition_strategy=options["partition_strategy"],
-        partition_ocr_languages=options["partition_ocr_languages"],
-        partition_pdf_infer_table_structure=options["partition_pdf_infer_table_structure"],
-        partition_encoding=options["partition_encoding"],
-        num_processes=options["num_processes"],
-        reprocess=options["reprocess"],
-        max_docs=options["max_docs"],
-    )
-
-
-def add_remote_url_option(cmd: Command):
-    cmd.params.append(
-        Option(
-            ["--remote-url"],
-            required=True,
-            help="Remote fsspec URL formatted as `protocol://dir/path`, it can contain both "
-            "a directory or a single file.",
-        ),
-    )
-
-
-def add_recursive_option(cmd: Command):
-    cmd.params.append(
-        Option(
-            ["--recursive"],
-            is_flag=True,
-            default=False,
-            help="Recursively download files in their respective folders"
-            "otherwise stop at the files in provided folder level.",
-        ),
-    )
-
-
-def add_shared_options(cmd: Command):
-    options = [
-        Option(
-            ["--max-docs"],
-            default=None,
-            type=int,
-            help="If specified, process at most specified number of documents.",
-        ),
-        Option(
-            ["--flatten-metadata"],
-            is_flag=True,
-            default=False,
-            help="Results in flattened json elements. "
-            "Specifically, the metadata key values are brought to the top-level of the element, "
-            "and the `metadata` key itself is removed.",
-        ),
-        Option(
-            ["--fields-include"],
-            default="element_id,text,type,metadata",
-            help="If set, include the specified top-level fields in an element. "
-            "Default is `element_id,text,type,metadata`.",
-        ),
-        Option(
-            ["--metadata-include"],
-            default=None,
-            help="If set, include the specified metadata fields if they exist "
-            "and drop all other fields. "
-            "Usage: provide a single string with comma separated values. "
-            "Example: --metadata-include filename,page_number ",
-        ),
-        Option(
-            ["--metadata-exclude"],
-            default=None,
-            help="If set, drop the specified metadata fields if they exist. "
-            "Usage: provide a single string with comma separated values. "
-            "Example: --metadata-exclude filename,page_number ",
-        ),
-        Option(
-            ["--partition-by-api"],
-            is_flag=True,
-            default=False,
-            help="Use a remote API to partition the files."
-            " Otherwise, use the function from partition.auto",
-        ),
-        Option(
-            ["--partition-endpoint"],
-            default="https://api.unstructured.io/general/v0/general",
-            help="If partitioning via api, use the following host. "
-            "Default: https://api.unstructured.io/general/v0/general",
-        ),
-        Option(
-            ["--partition-strategy"],
-            default="auto",
-            help="The method that will be used to process the documents. "
-            "Default: auto. Other strategies include `fast` and `hi_res`.",
-        ),
-        Option(
-            ["--partition-ocr-languages"],
-            default="eng",
-            help="A list of language packs to specify which languages to use for OCR, "
-            "separated by '+' e.g. 'eng+deu' to use the English and German language packs. "
-            "The appropriate Tesseract "
-            "language pack needs to be installed."
-            "Default: eng",
-        ),
-        Option(
-            ["--partition-pdf-infer-table-structure"],
-            default=False,
-            help="If set to True, partition will includ the table's text content in the response."
-            "Default: False",
-        ),
-        Option(
-            ["--partition-encoding"],
-            default=None,
-            help="Text encoding to use when reading documents. By default the encoding is "
-            "detected automatically.",
-        ),
-        Option(
-            ["--api-key"],
-            default="",
-            help="API Key for partition endpoint.",
-        ),
-        Option(
-            ["--local-input-path"],
-            default=None,
-            help="Path to the location in the local file system that will be processed.",
-        ),
-        Option(
-            ["--local-file-glob"],
-            default=None,
-            help="A comma-separated list of file globs to limit which "
-            "types of local files are accepted,"
-            " e.g. '*.html,*.txt'",
-        ),
-        Option(
-            ["--download-dir"],
-            help="Where files are downloaded to, defaults to "
-            "`$HOME/.cache/unstructured/ingest/<SHA256>`.",
-        ),
-        Option(
-            ["--preserve-downloads"],
-            is_flag=True,
-            default=False,
-            help="Preserve downloaded files. Otherwise each file is removed after being processed "
-            "successfully.",
-        ),
-        Option(
-            ["--download-only"],
-            is_flag=True,
-            default=False,
-            help="Download any files that are not already present in either --download-dir or "
-            "the default download ~/.cache/... location in case --download-dir "
-            "is not specified and "
-            "skip processing them through unstructured.",
-        ),
-        Option(
-            ["--re-download/--no-re-download"],
-            default=False,
-            help="Re-download files even if they are already present in --download-dir.",
-        ),
-        Option(
-            ["--structured-output-dir"],
-            default="structured-output",
-            help="Where to place structured output .json files.",
-        ),
-        Option(
-            ["--reprocess"],
-            is_flag=True,
-            default=False,
-            help="Reprocess a downloaded file even if the relevant structured output .json file "
-            "in --structured-output-dir already exists.",
-        ),
-        Option(
-            ["--num-processes"],
-            default=2,
-            show_default=True,
-            help="Number of parallel processes to process docs in.",
-        ),
-        Option(["-v", "--verbose"], is_flag=True, default=False),
-    ]
-    cmd.params.extend(options)
diff --git a/src/unstructured/ingest/connector/airtable.py b/src/unstructured/ingest/connector/airtable.py
deleted file mode 100644
index 92b09c8..0000000
--- a/src/unstructured/ingest/connector/airtable.py
+++ /dev/null
@@ -1,222 +0,0 @@
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleAirtableConfig(BaseConnectorConfig):
-    """Connector config where:
-    auth_token is the authentication token to authenticate into Airtable.
-
-    Check https://support.airtable.com/docs/airtable-api-key-deprecation-notice
-    for more info on authentication.
-    """
-
-    personal_access_token: str
-    list_of_paths: Optional[str]
-
-
-@dataclass
-class AirtableFileMeta:
-    """Metadata specifying a table id, a base id which the table is stored in,
-    and an optional view id in case particular rows and fields are to be ingested"""
-
-    base_id: str
-    table_id: str
-    view_id: Optional[str] = None
-
-
-@dataclass
-class AirtableIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing).
-
-    Current implementation creates an Airtable connection object
-    to fetch each document, rather than creating a it for each thread.
-    """
-
-    config: SimpleAirtableConfig
-    file_meta: AirtableFileMeta
-
-    @property
-    def filename(self):
-        return (
-            Path(self.standard_config.download_dir)
-            / self.file_meta.base_id
-            / f"{self.file_meta.table_id}.csv"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create output file path based on output directory, base id, and table id"""
-        output_file = f"{self.file_meta.table_id}.json"
-        return Path(self.standard_config.output_dir) / self.file_meta.base_id / output_file
-
-    @requires_dependencies(["pyairtable", "pandas"])
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-
-        # TODO: instead of having a separate connection object for each doc,
-        # have a separate connection object for each process
-        import pandas as pd
-        from pyairtable import Api
-
-        self.api = Api(self.config.personal_access_token)
-        table = self.api.table(self.file_meta.base_id, self.file_meta.table_id)
-
-        df = pd.DataFrame.from_dict(
-            [row["fields"] for row in table.all(view=self.file_meta.view_id)],
-        ).sort_index(axis=1)
-
-        self.document = df.to_csv()
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.document)
-
-
-airtable_id_prefixes = ["app", "tbl", "viw"]
-
-
-def raise_airtable_path_error(piece):
-    if any(piece[:3] == prefix for prefix in airtable_id_prefixes):
-        raise (
-            ValueError(
-                "Path components are not correctly ordered.\
-                            Valid path structures: \
-                            - base_id/table_id/view_id , \
-                            - base_id/table_id, \
-                            - base_id .\
-                            It is also possible to leave --airtable-list-of-paths \
-                            argument empty (this will ingest everything).",
-            )
-        )
-    else:
-        raise (
-            ValueError(
-                """Path components are not valid Airtable ids.
-                        base_id should look like: appAbcDeF1ghijKlm,
-                        table_id should look like: tblAbcDeF1ghijKlm,
-                        view_id should look like:  viwAbcDeF1ghijKlm""",
-            )
-        )
-
-
-def check_path_validity(path):
-    pieces = path.split("/")
-    assert (
-        1 <= len(pieces) <= 3
-    ), "Path should be composed of between 1-3 \
-                                components (base_id, table_id, view_id)."
-
-    for i, piece in enumerate(pieces):
-        try:
-            assert piece[:3] == airtable_id_prefixes[i]
-        except AssertionError:
-            raise_airtable_path_error(piece)
-
-
-@dataclass
-class AirtableConnector(ConnectorCleanupMixin, BaseConnector):
-    """Fetches tables or views from an Airtable org."""
-
-    config: SimpleAirtableConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleAirtableConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    @requires_dependencies(["pyairtable"])
-    def initialize(self):
-        from pyairtable import Api
-
-        self.base_ids_to_fetch_tables_from = []
-        if self.config.list_of_paths:
-            self.list_of_paths = self.config.list_of_paths.split()
-
-        self.api = Api(self.config.personal_access_token)
-
-    @requires_dependencies(["pyairtable"])
-    def use_all_bases(self):
-        from pyairtable.metadata import get_api_bases
-
-        self.base_ids_to_fetch_tables_from = [
-            base["id"] for base in get_api_bases(self.api)["bases"]
-        ]
-
-    @requires_dependencies(["pyairtable"])
-    def fetch_table_ids(self):
-        from pyairtable.metadata import get_base_schema
-
-        bases = [
-            (base_id, self.api.base(base_id)) for base_id in self.base_ids_to_fetch_tables_from
-        ]
-
-        metadata_for_each_base = [
-            (base_id, get_base_schema(base)["tables"]) for base_id, base in bases
-        ]
-
-        baseid_tableid_viewid_tuples = [
-            (base_id, table["id"], None)
-            for base_id, base_metadata in metadata_for_each_base
-            for table in base_metadata
-        ]
-
-        return baseid_tableid_viewid_tuples
-
-    def get_ingest_docs(self):
-        """Fetches documents in an Airtable org."""
-
-        # When no list of paths provided, the connector ingests everything.
-        if not self.config.list_of_paths:
-            self.use_all_bases()
-            baseid_tableid_viewid_tuples = self.fetch_table_ids()
-
-        # When there is a list of paths, the connector checks the validity
-        # of the paths, and fetches table_ids to be ingested, based on the paths.
-        else:
-            self.paths = self.config.list_of_paths.split()
-            self.paths = [path.strip("/") for path in self.paths]
-
-            [check_path_validity(path) for path in self.paths]
-
-            self.base_ids_to_fetch_tables_from = []
-            baseid_tableid_viewid_tuples = []
-
-            for path in self.paths:
-                components = path.split("/")
-                if len(components) == 1:  # only a base_id is provided
-                    self.base_ids_to_fetch_tables_from.append(components[0])
-                elif len(components) == 2:  # a base_id and a table_id are provided
-                    baseid_tableid_viewid_tuples.append((components[0], components[1], None))
-                elif len(components) == 3:  # a base_id, table_id, and a view_id are provided
-                    baseid_tableid_viewid_tuples.append(
-                        (components[0], components[1], components[2]),
-                    )
-
-            baseid_tableid_viewid_tuples += self.fetch_table_ids()
-
-        return [
-            AirtableIngestDoc(
-                self.standard_config,
-                self.config,
-                AirtableFileMeta(base_id, table_id, view_id),
-            )
-            for base_id, table_id, view_id in baseid_tableid_viewid_tuples
-        ]
diff --git a/src/unstructured/ingest/connector/azure.py b/src/unstructured/ingest/connector/azure.py
deleted file mode 100644
index df059c9..0000000
--- a/src/unstructured/ingest/connector/azure.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from dataclasses import dataclass
-from typing import Type
-
-from unstructured.ingest.connector.fsspec import (
-    FsspecConnector,
-    FsspecIngestDoc,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.interfaces import StandardConnectorConfig
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleAzureBlobStorageConfig(SimpleFsspecConfig):
-    pass
-
-
-class AzureBlobStorageIngestDoc(FsspecIngestDoc):
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def get_file(self):
-        super().get_file()
-
-
-@requires_dependencies(["adlfs", "fsspec"], extras="azure")
-class AzureBlobStorageConnector(FsspecConnector):
-    ingest_doc_cls: Type[AzureBlobStorageIngestDoc] = AzureBlobStorageIngestDoc
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleAzureBlobStorageConfig,
-    ) -> None:
-        super().__init__(standard_config=standard_config, config=config)
diff --git a/src/unstructured/ingest/connector/biomed.py b/src/unstructured/ingest/connector/biomed.py
deleted file mode 100644
index c30212a..0000000
--- a/src/unstructured/ingest/connector/biomed.py
+++ /dev/null
@@ -1,292 +0,0 @@
-import os
-import urllib.request
-from dataclasses import dataclass
-from ftplib import FTP, error_perm
-from pathlib import Path
-from typing import List, Optional, Union
-
-import requests
-from bs4 import BeautifulSoup
-from requests.adapters import HTTPAdapter
-from urllib3.util import Retry
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    validate_date_args,
-)
-
-DOMAIN = "ftp.ncbi.nlm.nih.gov"
-FTP_DOMAIN = f"ftp://{DOMAIN}"
-PMC_DIR = "pub/pmc"
-PDF_DIR = "oa_pdf"
-
-
-@dataclass
-class BiomedFileMeta:
-    ftp_path: str
-    download_filepath: Union[str, os.PathLike]
-    output_filepath: Union[str, os.PathLike]
-
-
-@dataclass
-class SimpleBiomedConfig(BaseConnectorConfig):
-    """Connector config where path is the FTP directory path and
-    id_, from_, until, format are API parameters."""
-
-    path: Optional[str]
-    # OA Web Service API Options
-    id_: Optional[str]
-    from_: Optional[str]
-    until: Optional[str]
-    max_retries: int = 5
-    request_timeout: int = 45
-    decay: float = 0.3
-
-    def validate_api_inputs(self):
-        valid = False
-
-        if self.from_:
-            valid = validate_date_args(self.from_)
-
-        if self.until:
-            valid = validate_date_args(self.until)
-
-        return valid
-
-    def __post_init__(self):
-        self.is_file = False
-        self.is_dir = False
-        self.is_api = False
-
-        if not self.path:
-            is_valid = self.validate_api_inputs()
-            if not is_valid:
-                raise ValueError(
-                    "Path argument or at least one of the "
-                    "OA Web Service arguments MUST be provided.",
-                )
-
-            self.is_api = True
-        else:
-            self.path = self.path.strip("/")
-            is_valid = self.path.lower().startswith(PDF_DIR)
-
-            if not is_valid:
-                raise ValueError(f"Path MUST start with {PDF_DIR}")
-
-            ftp = FTP(DOMAIN)
-            ftp.login()
-
-            path = Path(PMC_DIR) / self.path
-            response = ""
-            try:
-                if path.suffix == ".pdf":
-                    response = ftp.cwd(str(path.parent))
-                    self.is_file = True
-                else:
-                    response = ftp.cwd(str(path))
-            except error_perm as exc:
-                if "no such file or directory" in exc.args[0].lower():
-                    raise ValueError(f"The path: {path} is not valid.")
-                elif "not a directory" in exc.args[0].lower():
-                    self.is_file = True
-                elif "command successful" in response:
-                    self.is_dir = True
-                else:
-                    raise ValueError(
-                        "Something went wrong when validating the path: {path}.",
-                    )
-
-
-@dataclass
-class BiomedIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleBiomedConfig
-    file_meta: BiomedFileMeta
-
-    @property
-    def filename(self):
-        return Path(self.file_meta.download_filepath).resolve()  # type: ignore
-
-    @property
-    def _output_filename(self):
-        return Path(f"{self.file_meta.output_filepath}.json").resolve()
-
-    def cleanup_file(self):
-        if (
-            not self.standard_config.preserve_downloads
-            and self.filename.is_file()
-            and not self.standard_config.download_only
-        ):
-            logger.debug(f"Cleaning up {self}")
-            Path.unlink(self.filename)
-
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        download_path = self.file_meta.download_filepath  # type: ignore
-        dir_ = Path(os.path.dirname(download_path))  # type: ignore
-        if not dir_.is_dir():
-            logger.debug(f"Creating directory: {dir_}")
-
-            if dir_:
-                dir_.mkdir(parents=True, exist_ok=True)
-        urllib.request.urlretrieve(
-            self.file_meta.ftp_path,  # type: ignore
-            self.file_meta.download_filepath,
-        )
-        logger.debug(f"File downloaded: {self.file_meta.download_filepath}")
-
-
-class BiomedConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching documents from Biomedical literature FTP directory"""
-
-    config: SimpleBiomedConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleBiomedConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    def _list_objects_api(self):
-        def urls_to_metadata(urls):
-            files = []
-            for url in urls:
-                parts = url.split(PDF_DIR)
-                if len(parts) > 1:
-                    local_path = parts[1].strip("/")
-                    files.append(
-                        BiomedFileMeta(
-                            ftp_path=url,
-                            download_filepath=(
-                                Path(self.standard_config.download_dir) / local_path
-                            ).resolve(),
-                            output_filepath=(
-                                Path(self.standard_config.output_dir) / local_path
-                            ).resolve(),
-                        ),
-                    )
-
-            return files
-
-        files: List[BiomedFileMeta] = []
-
-        endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf"
-
-        if self.config.id_:
-            endpoint_url += f"&id={self.config.id_}"
-
-        if self.config.from_:
-            endpoint_url += f"&from={self.config.from_}"
-
-        if self.config.until:
-            endpoint_url += f"&until={self.config.until}"
-
-        while endpoint_url:
-            session = requests.Session()
-            retries = Retry(
-                total=self.config.max_retries,
-                backoff_factor=self.config.decay,
-            )
-            adapter = HTTPAdapter(max_retries=retries)
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-            response = session.get(endpoint_url, timeout=self.config.request_timeout)
-            soup = BeautifulSoup(response.content, features="lxml")
-            urls = [link["href"] for link in soup.find_all("link")]
-
-            if not urls:
-                return files
-
-            endpoint_url = urls[-1] if "resumptiontoken" in urls[-1].lower() else None
-            if endpoint_url:
-                urls = urls[:-1]
-
-            files.extend(urls_to_metadata(urls))
-
-        return files
-
-    def _list_objects(self):
-        files = []
-
-        # Conform to mypy, null check performed elsewhere.
-        # Wouldn't be in this method unless self.config.path exists
-        path: str = self.config.path if self.config.path else ""
-
-        def traverse(path, download_dir, output_dir):
-            full_path = Path(PMC_DIR) / path
-            logger.debug(f"Traversing directory: {full_path}")
-
-            ftp = FTP(DOMAIN)
-            ftp.login()
-
-            try:
-                response = ftp.cwd(str(full_path))
-            except error_perm:
-                raise ValueError(f"{full_path} is not a valid directory.")
-
-            if "command successful" in response.lower():
-                sub_paths = [path / p for p in ftp.nlst()]
-
-                if not sub_paths:
-                    return
-
-                ext = Path(sub_paths[0]).suffix
-                if ext:
-                    for sub_path in sub_paths:
-                        ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{sub_path}"
-                        local_path = "/".join(str(sub_path).split("/")[1:])
-                        files.append(
-                            BiomedFileMeta(
-                                ftp_path=ftp_path,
-                                download_filepath=(
-                                    Path(self.standard_config.download_dir) / local_path
-                                ).resolve(),
-                                output_filepath=(
-                                    Path(self.standard_config.output_dir) / local_path
-                                ).resolve(),
-                            ),
-                        )
-
-                else:
-                    for sub_path in sub_paths:
-                        traverse(sub_path, download_dir, output_dir)
-
-            else:
-                raise ValueError(f"{full_path} is not a valid directory.")
-
-        ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{self.config.path}"
-        if self.config.is_file:
-            local_path = "/".join(path.split("/")[1:])
-            return [
-                BiomedFileMeta(
-                    ftp_path=ftp_path,
-                    download_filepath=(
-                        Path(self.standard_config.download_dir) / local_path
-                    ).resolve(),
-                    output_filepath=(Path(self.standard_config.output_dir) / local_path).resolve(),
-                ),
-            ]
-        else:
-            traverse(
-                Path(path),
-                Path(self.standard_config.download_dir),
-                Path(self.standard_config.output_dir),
-            )
-
-        return files
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        files = self._list_objects_api() if self.config.is_api else self._list_objects()
-        return [BiomedIngestDoc(self.standard_config, self.config, file) for file in files]
diff --git a/src/unstructured/ingest/connector/box.py b/src/unstructured/ingest/connector/box.py
deleted file mode 100644
index 3eae7b8..0000000
--- a/src/unstructured/ingest/connector/box.py
+++ /dev/null
@@ -1,74 +0,0 @@
-"""
-Box Connector
-Box does not make it simple to download files with an App.
-First of all, this does not work with a free Box account.
-Make sure the App service email is a collaborator for your folder (co-owner or editor)
-Make sure you have the 'write all files' application scope
-Maybe check 'Make api calls as the as-user header'
-REAUTHORIZE app after making any of the above changes
-"""
-
-from dataclasses import dataclass
-from typing import Type
-
-from unstructured.ingest.connector.fsspec import (
-    FsspecConnector,
-    FsspecIngestDoc,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.interfaces import StandardConnectorConfig
-from unstructured.utils import requires_dependencies
-
-
-class AccessTokenError(Exception):
-    """There is a problem with the Access Token."""
-
-
-@dataclass
-class SimpleBoxConfig(SimpleFsspecConfig):
-    @requires_dependencies(["boxfs"], extras="box")
-    def __post_init__(self):
-        from boxsdk import JWTAuth
-
-        super().__post_init__()
-        # We are passing in a json file path via the envt. variable.
-        # Need to convert that to an Oauth2 object.
-        try:
-            self.access_kwargs["oauth"] = JWTAuth.from_settings_file(
-                self.access_kwargs["box_app_config"],
-            )
-        except (TypeError, ValueError, KeyError) as e:
-            raise AccessTokenError(f"Problem with box_app_config: {e}")
-
-    def __getstate__(self):
-        """
-        NOTE: This should not be a permanent solution.
-        Multiprocessing fails when it tries to pickle some Locks in the SimpleBoxConfig.
-        __getstate__ is called right before an object gets pickled.
-        We are setting those attributes to None to allow pickling.
-        """
-        state = self.__dict__.copy()
-        state["access_kwargs"]["oauth"]._refresh_lock = None
-        state["access_kwargs"]["oauth"]._rsa_private_key._blinding_lock = None
-        state["access_kwargs"]["oauth"]._rsa_private_key._backend = None
-        state["access_kwargs"]["oauth"]._rsa_private_key._rsa_cdata = None
-        state["access_kwargs"]["oauth"]._rsa_private_key._evp_pkey = None
-        return state
-
-
-class BoxIngestDoc(FsspecIngestDoc):
-    @requires_dependencies(["boxfs", "fsspec"], extras="box")
-    def get_file(self):
-        super().get_file()
-
-
-@requires_dependencies(["boxfs", "fsspec"], extras="box")
-class BoxConnector(FsspecConnector):
-    ingest_doc_cls: Type[BoxIngestDoc] = BoxIngestDoc
-
-    def __init__(
-        self,
-        config: SimpleBoxConfig,
-        standard_config: StandardConnectorConfig,
-    ) -> None:
-        super().__init__(standard_config, config)
diff --git a/src/unstructured/ingest/connector/confluence.py b/src/unstructured/ingest/connector/confluence.py
deleted file mode 100644
index 5771aef..0000000
--- a/src/unstructured/ingest/connector/confluence.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import math
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from atlassian import Confluence
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleConfluenceConfig(BaseConnectorConfig):
-    """Connector config where:
-    user_email is the email to authenticate into Confluence Cloud,
-    api_token is the api token to authenticate into Confluence Cloud,
-    and url is the URL pointing to the Confluence Cloud instance.
-
-    Check https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/
-    for more info on the api_token.
-    """
-
-    user_email: str
-    api_token: str
-    url: str
-    list_of_spaces: Optional[str]
-    max_number_of_spaces: int
-    max_number_of_docs_from_each_space: int
-
-
-@dataclass
-class ConfluenceFileMeta:
-    """Metadata specifying:
-    id for the confluence space that the document locates in,
-    and the id of document that is being reached to.
-    """
-
-    space_id: str
-    document_id: str
-
-
-def scroll_wrapper(func):
-    def wrapper(*args, **kwargs):
-        """Wraps a function to obtain scroll functionality."""
-        number_of_items_to_fetch = kwargs["number_of_items_to_fetch"]
-        del kwargs["number_of_items_to_fetch"]
-
-        kwargs["limit"] = min(100, number_of_items_to_fetch)
-        kwargs["start"] = 0 if "start" not in kwargs else kwargs["start"]
-
-        all_results = []
-        num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"])
-
-        for _ in range(num_iterations):
-            response = func(*args, **kwargs)
-            if type(response) is list:
-                all_results += func(*args, **kwargs)
-            elif type(response) is dict:
-                all_results += func(*args, **kwargs)["results"]
-
-            kwargs["start"] += kwargs["limit"]
-
-        return all_results[:number_of_items_to_fetch]
-
-    return wrapper
-
-
-@dataclass
-class ConfluenceIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing).
-
-    Current implementation creates a Confluence connection object
-    to fetch each doc, rather than creating a it for each thread.
-    """
-
-    config: SimpleConfluenceConfig
-    file_meta: ConfluenceFileMeta
-
-    # TODO: remove one of filename or _tmp_download_file, using a wrapper
-    @property
-    def filename(self):
-        return (
-            Path(self.standard_config.download_dir)
-            / self.file_meta.space_id
-            / f"{self.file_meta.document_id}.html"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create output file path based on output directory, space id and document id."""
-        output_file = f"{self.file_meta.document_id}.json"
-        return Path(self.standard_config.output_dir) / self.file_meta.space_id / output_file
-
-    @requires_dependencies(["atlassian"])
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-
-        # TODO: instead of having a separate connection object for each doc,
-        # have a separate connection object for each process
-        confluence = Confluence(
-            self.config.url,
-            username=self.config.user_email,
-            password=self.config.api_token,
-        )
-
-        result = confluence.get_page_by_id(page_id=self.file_meta.document_id, expand="body.view")
-        self.document = result["body"]["view"]["value"]
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.document)
-
-
-@requires_dependencies(["atlassian"])
-@dataclass
-class ConfluenceConnector(ConnectorCleanupMixin, BaseConnector):
-    """Fetches body fields from all documents within all spaces in a Confluence Cloud instance."""
-
-    config: SimpleConfluenceConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleConfluenceConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    @requires_dependencies(["atlassian"])
-    def initialize(self):
-        self.confluence = Confluence(
-            url=self.config.url,
-            username=self.config.user_email,
-            password=self.config.api_token,
-        )
-
-        self.list_of_spaces = None
-        if self.config.list_of_spaces:
-            self.list_of_spaces = self.config.list_of_spaces.split(",")
-            if self.config.max_number_of_spaces:
-                logger.warning(
-                    """--confluence-list-of-spaces and --confluence-num-of-spaces cannot
-                    be used at the same time. Connector will only fetch the
-                    --confluence-list-of-spaces that you've provided.""",
-                )
-
-    @requires_dependencies(["atlassian"])
-    def _get_space_ids(self):
-        """Fetches spaces in a confluence domain."""
-
-        get_spaces_with_scroll = scroll_wrapper(self.confluence.get_all_spaces)
-
-        all_results = get_spaces_with_scroll(
-            number_of_items_to_fetch=self.config.max_number_of_spaces,
-        )
-
-        space_ids = [space["key"] for space in all_results]
-        return space_ids
-
-    @requires_dependencies(["atlassian"])
-    def _get_docs_ids_within_one_space(
-        self,
-        space_id: str,
-        content_type: str = "page",
-    ):
-        get_pages_with_scroll = scroll_wrapper(self.confluence.get_all_pages_from_space)
-        results = get_pages_with_scroll(
-            space=space_id,
-            number_of_items_to_fetch=self.config.max_number_of_docs_from_each_space,
-            content_type=content_type,
-        )
-
-        doc_ids = [(space_id, doc["id"]) for doc in results]
-        return doc_ids
-
-    @requires_dependencies(["atlassian"])
-    def _get_doc_ids_within_spaces(self):
-        space_ids = self._get_space_ids() if not self.list_of_spaces else self.list_of_spaces
-
-        doc_ids_all = [self._get_docs_ids_within_one_space(space_id=id) for id in space_ids]
-
-        doc_ids_flattened = [
-            (space_id, doc_id)
-            for doc_ids_space in doc_ids_all
-            for space_id, doc_id in doc_ids_space
-        ]
-        return doc_ids_flattened
-
-    def get_ingest_docs(self):
-        """Fetches all documents in a confluence space."""
-        doc_ids = self._get_doc_ids_within_spaces()
-        return [
-            ConfluenceIngestDoc(
-                self.standard_config,
-                self.config,
-                ConfluenceFileMeta(space_id, doc_id),
-            )
-            for space_id, doc_id in doc_ids
-        ]
diff --git a/src/unstructured/ingest/connector/discord.py b/src/unstructured/ingest/connector/discord.py
deleted file mode 100644
index 636fdf4..0000000
--- a/src/unstructured/ingest/connector/discord.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import datetime as dt
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import List, Optional
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    requires_dependencies,
-)
-
-
-@dataclass
-class SimpleDiscordConfig(BaseConnectorConfig):
-    """Connector config where channels is a comma separated list of
-    Discord channels to pull messages from.
-    """
-
-    # Discord Specific Options
-    channels: List[str]
-    token: str
-    days: Optional[int]
-    verbose: bool = False
-
-    def __post_init__(self):
-        if self.days:
-            try:
-                self.days = int(self.days)
-            except ValueError:
-                raise ValueError("--discord-period must be an integer")
-
-        pass
-
-    @staticmethod
-    def parse_channels(channel_str: str) -> List[str]:
-        """Parses a comma separated list of channels into a list."""
-        return [x.strip() for x in channel_str.split(",")]
-
-
-@dataclass
-class DiscordIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    config: SimpleDiscordConfig
-    channel: str
-    days: Optional[int]
-    token: str
-
-    # NOTE(crag): probably doesn't matter,  but intentionally not defining tmp_download_file
-    # __post_init__ for multiprocessing simplicity (no Path objects in initially
-    # instantiated object)
-    def _tmp_download_file(self):
-        channel_file = self.channel + ".txt"
-        return Path(self.standard_config.download_dir) / channel_file
-
-    @property
-    def _output_filename(self):
-        output_file = self.channel + ".json"
-        return Path(self.standard_config.output_dir) / output_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["discord"], extras="discord")
-    def get_file(self):
-        """Actually fetches the data from discord and stores it locally."""
-
-        import discord
-        from discord.ext import commands
-
-        self._create_full_tmp_dir_path()
-        if self.config.verbose:
-            logger.debug(f"fetching {self} - PID: {os.getpid()}")
-        messages: List[discord.Message] = []
-        intents = discord.Intents.default()
-        intents.message_content = True
-        bot = commands.Bot(command_prefix=">", intents=intents)
-
-        @bot.event
-        async def on_ready():
-            try:
-                after_date = None
-                if self.days:
-                    after_date = dt.datetime.utcnow() - dt.timedelta(days=self.days)
-
-                channel = bot.get_channel(int(self.channel))
-                async for msg in channel.history(after=after_date):  # type: ignore
-                    messages.append(msg)
-
-                await bot.close()
-            except Exception as e:
-                logger.error(f"Error fetching messages: {e}")
-                await bot.close()
-
-        bot.run(self.token)
-
-        with open(self._tmp_download_file(), "w") as f:
-            for m in messages:
-                f.write(m.content + "\n")
-
-    @property
-    def filename(self):
-        """The filename of the file created from a discord channel"""
-        return self._tmp_download_file()
-
-
-class DiscordConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    config: SimpleDiscordConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleDiscordConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-        os.mkdir(self.standard_config.download_dir)
-
-    def get_ingest_docs(self):
-        return [
-            DiscordIngestDoc(
-                self.standard_config,
-                self.config,
-                channel,
-                self.config.days,
-                self.config.token,
-            )
-            for channel in self.config.channels
-        ]
diff --git a/src/unstructured/ingest/connector/dropbox.py b/src/unstructured/ingest/connector/dropbox.py
deleted file mode 100644
index 385e05b..0000000
--- a/src/unstructured/ingest/connector/dropbox.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""
-Dropbox Connector
-The Dropbox Connector presents a couple abnormal situations.
-1) They don't have an unexpiring token
-2) They require a forward slash `/` in front of the remote_file_path. This presents
-some real problems creating paths. When appending a path that begins with a
-forward slash to any path, whether using the / shorthand or joinpath, causes the
-starting path to disappear. So the `/` needs to be stripped off.
-3) To list and get files from the root directory Dropbox you need a ""," ", or " /"
-"""
-import re
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Type
-
-from unstructured.ingest.connector.fsspec import (
-    FsspecConnector,
-    FsspecIngestDoc,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.interfaces import StandardConnectorConfig
-from unstructured.utils import requires_dependencies
-
-
-class MissingFolderError(Exception):
-    """There is no folder by that name. For root try `dropbox:// /`"""
-
-
-@dataclass
-class SimpleDropboxConfig(SimpleFsspecConfig):
-    pass
-
-
-class DropboxIngestDoc(FsspecIngestDoc):
-    @requires_dependencies(["dropboxdrivefs", "fsspec"])
-    def get_file(self):
-        super().get_file()
-
-    @property
-    def _output_filename(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root
-        if self.config.dir_path == " ":
-            return Path(self.standard_config.output_dir) / re.sub(
-                "^/",
-                "",
-                f"{self.remote_file_path}.json",
-            )
-        else:
-            return (
-                Path(self.standard_config.output_dir)
-                / f"{self.remote_file_path.replace(f'/{self.config.dir_path}/', '')}.json"
-            )
-
-    def _tmp_download_file(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root
-        if self.config.dir_path == " ":
-            return Path(self.standard_config.download_dir) / re.sub(
-                "^/",
-                "",
-                self.remote_file_path,
-            )
-        else:
-            return Path(self.standard_config.download_dir) / self.remote_file_path.replace(
-                f"/{self.config.dir_path}/",
-                "",
-            )
-
-
-@requires_dependencies(["dropboxdrivefs", "fsspec"])
-class DropboxConnector(FsspecConnector):
-    ingest_doc_cls: Type[DropboxIngestDoc] = DropboxIngestDoc
-
-    def __init__(
-        self,
-        config: SimpleDropboxConfig,
-        standard_config: StandardConnectorConfig,
-    ) -> None:
-        super().__init__(standard_config, config)
-
-    def initialize(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        ls_output = self.fs.ls(f"/{self.config.path_without_protocol}")
-        if ls_output and len(ls_output) >= 1:
-            return
-        elif ls_output:
-            raise ValueError(
-                f"No objects found in {self.config.path}.",
-            )
-        else:
-            raise MissingFolderError(
-                "There is no folder by that name. For root try `dropbox:// /`",
-            )
-
-    def _list_files(self):
-        # Dropbox requires a forward slash at the front of the folder path. This
-        # creates some complications in path joining so a custom path is created here.
-        if not self.config.recursive:
-            # fs.ls does not walk directories
-            # directories that are listed in cloud storage can cause problems because they are seen
-            # as 0byte files
-            return [
-                x.get("name")
-                for x in self.fs.ls(
-                    f"/{self.config.path_without_protocol}",
-                    detail=True,
-                )
-                if x.get("size")
-            ]
-        else:
-            # fs.find will recursively walk directories
-            # "size" is a common key for all the cloud protocols with fs
-            return [
-                k
-                for k, v in self.fs.find(
-                    f"/{self.config.path_without_protocol}",
-                    detail=True,
-                ).items()
-                if v.get("size")
-            ]
diff --git a/src/unstructured/ingest/connector/elasticsearch.py b/src/unstructured/ingest/connector/elasticsearch.py
deleted file mode 100644
index a48c7d4..0000000
--- a/src/unstructured/ingest/connector/elasticsearch.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import hashlib
-import json
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-import jq
-from elasticsearch import Elasticsearch
-from elasticsearch.helpers import scan
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleElasticsearchConfig(BaseConnectorConfig):
-    """Connector config where:
-    url is the url to access the elasticsearch server,
-    index_name is the name of the index to reach to,
-
-    and jq_query is a query to get specific fields from each document that is reached,
-    rather than getting and processing all fields in a document.
-    """
-
-    url: str
-    index_name: str
-    jq_query: Optional[str]
-
-
-@dataclass
-class ElasticsearchFileMeta:
-    """Metadata specifying:
-    name of the elasticsearch index that is being reached to,
-    and the id of document that is being reached to,
-    """
-
-    index_name: str
-    document_id: str
-
-
-@dataclass
-class ElasticsearchIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Current implementation creates a python Elasticsearch client to fetch each doc,
-    rather than creating a client for each thread.
-    """
-
-    config: SimpleElasticsearchConfig
-    file_meta: ElasticsearchFileMeta
-
-    # TODO: remove one of filename or _tmp_download_file, using a wrapper
-    @property
-    def filename(self):
-        return (
-            Path(self.standard_config.download_dir)
-            / self.file_meta.index_name
-            / f"{self.file_meta.document_id}.txt"
-        ).resolve()
-
-    @property
-    def _output_filename(self):
-        """Create filename document id combined with a hash of the query to uniquely identify
-        the output file."""
-        # Generate SHA256 hash and take the first 8 characters
-        query_hash = hashlib.sha256((self.config.jq_query or "").encode()).hexdigest()[:8]
-        output_file = f"{self.file_meta.document_id}-{query_hash}.json"
-        return Path(self.standard_config.output_dir) / self.config.index_name / output_file
-
-    # TODO: change test fixtures such that examples with
-    # nested dictionaries are included in test documents
-    def _flatten_values(self, value, seperator="\n", no_value_str=""):
-        """Flattens list or dict objects. Joins each value or item with
-        the seperator character. Keys are not included in the joined string.
-        When a dict value or a list item is None, no_value_str is used to
-        represent that value / item."""
-        if value is None:
-            return no_value_str
-
-        if isinstance(value, list):
-            flattened_values = [self._flatten_values(item, seperator) for item in value]
-            return seperator.join(flattened_values)
-
-        elif isinstance(value, dict):
-            flattened_values = [self._flatten_values(item, seperator) for item in value.values()]
-            return seperator.join(flattened_values)
-
-        else:
-            return str(value)
-
-    def _concatenate_dict_fields(self, dictionary, seperator="\n"):
-        """Concatenates all values for each key in a dictionary in a nested manner.
-        Used to parse a python dictionary to an aggregated string"""
-        values = [self._flatten_values(value, seperator) for value in dictionary.values()]
-        concatenated_values = seperator.join(values)
-        return concatenated_values
-
-    @requires_dependencies(["elasticsearch"])
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-        # TODO: instead of having a separate client for each doc,
-        # have a separate client for each process
-        es = Elasticsearch(self.config.url)
-        document_dict = es.get(
-            index=self.config.index_name,
-            id=self.file_meta.document_id,
-        ).body["_source"]
-        if self.config.jq_query:
-            document_dict = json.loads(jq.compile(self.config.jq_query).input(document_dict).text())
-        self.document = self._concatenate_dict_fields(document_dict)
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.document)
-
-
-@requires_dependencies(["elasticsearch"])
-@dataclass
-class ElasticsearchConnector(ConnectorCleanupMixin, BaseConnector):
-    """Fetches particular fields from all documents in a given elasticsearch cluster and index"""
-
-    config: SimpleElasticsearchConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleElasticsearchConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    def initialize(self):
-        self.es = Elasticsearch(self.config.url)
-        self.scan_query: dict = {"query": {"match_all": {}}}
-        self.search_query: dict = {"match_all": {}}
-        self.es.search(index=self.config.index_name, query=self.search_query, size=1)
-
-    @requires_dependencies(["elasticsearch"])
-    def _get_doc_ids(self):
-        """Fetches all document ids in an index"""
-        hits = scan(
-            self.es,
-            query=self.scan_query,
-            scroll="1m",
-            index=self.config.index_name,
-        )
-
-        return [hit["_id"] for hit in hits]
-
-    def get_ingest_docs(self):
-        """Fetches all documents in an index, using ids that are fetched with _get_doc_ids"""
-        ids = self._get_doc_ids()
-        return [
-            ElasticsearchIngestDoc(
-                self.standard_config,
-                self.config,
-                ElasticsearchFileMeta(self.config.index_name, id),
-            )
-            for id in ids
-        ]
diff --git a/src/unstructured/ingest/connector/fsspec.py b/src/unstructured/ingest/connector/fsspec.py
deleted file mode 100644
index 63df58b..0000000
--- a/src/unstructured/ingest/connector/fsspec.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import re
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Type
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-
-SUPPORTED_REMOTE_FSSPEC_PROTOCOLS = [
-    "s3",
-    "s3a",
-    "abfs",
-    "az",
-    "gs",
-    "gcs",
-    "box",
-    "dropbox",
-]
-
-
-@dataclass
-class SimpleFsspecConfig(BaseConnectorConfig):
-    # fsspec specific options
-    path: str
-    recursive: bool
-    access_kwargs: dict = field(default_factory=dict)
-    protocol: str = field(init=False)
-    path_without_protocol: str = field(init=False)
-    dir_path: str = field(init=False)
-    file_path: str = field(init=False)
-
-    def __post_init__(self):
-        self.protocol, self.path_without_protocol = self.path.split("://")
-        if self.protocol not in SUPPORTED_REMOTE_FSSPEC_PROTOCOLS:
-            raise ValueError(
-                f"Protocol {self.protocol} not supported yet, only "
-                f"{SUPPORTED_REMOTE_FSSPEC_PROTOCOLS} are supported.",
-            )
-
-        # dropbox root is an empty string
-        match = re.match(rf"{self.protocol}://([\s])/", self.path)
-        if match and self.protocol == "dropbox":
-            self.dir_path = " "
-            self.file_path = ""
-            return
-
-        # just a path with no trailing prefix
-        match = re.match(rf"{self.protocol}://([^/\s]+?)(/*)$", self.path)
-        if match:
-            self.dir_path = match.group(1)
-            self.file_path = ""
-            return
-
-        # valid path with a dir and/or file
-        match = re.match(rf"{self.protocol}://([^/\s]+?)/([^\s]*)", self.path)
-        if not match:
-            raise ValueError(
-                f"Invalid path {self.path}. Expected <protocol>://<dir-path>/<file-or-dir-path>.",
-            )
-        self.dir_path = match.group(1)
-        self.file_path = match.group(2) or ""
-
-
-@dataclass
-class FsspecIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    config: SimpleFsspecConfig
-    remote_file_path: str
-
-    def _tmp_download_file(self):
-        return Path(self.standard_config.download_dir) / self.remote_file_path.replace(
-            f"{self.config.dir_path}/",
-            "",
-        )
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.standard_config.output_dir)
-            / f"{self.remote_file_path.replace(f'{self.config.dir_path}/', '')}.json"
-        )
-
-    def _create_full_tmp_dir_path(self):
-        """Includes "directories" in the object path"""
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the file from the current filesystem and stores it locally."""
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        self._create_full_tmp_dir_path()
-        fs: AbstractFileSystem = get_filesystem_class(self.config.protocol)(
-            **self.config.access_kwargs,
-        )
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-        fs.get(rpath=self.remote_file_path, lpath=self._tmp_download_file().as_posix())
-
-    @property
-    def filename(self):
-        """The filename of the file after downloading from cloud"""
-        return self._tmp_download_file()
-
-
-class FsspecConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    config: SimpleFsspecConfig
-    ingest_doc_cls: Type[FsspecIngestDoc] = FsspecIngestDoc
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleFsspecConfig,
-    ):
-        from fsspec import AbstractFileSystem, get_filesystem_class
-
-        super().__init__(standard_config, config)
-        self.fs: AbstractFileSystem = get_filesystem_class(self.config.protocol)(
-            **self.config.access_kwargs,
-        )
-
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-        ls_output = self.fs.ls(self.config.path_without_protocol)
-        if len(ls_output) < 1:
-            raise ValueError(
-                f"No objects found in {self.config.path}.",
-            )
-
-    def _list_files(self):
-        if not self.config.recursive:
-            # fs.ls does not walk directories
-            # directories that are listed in cloud storage can cause problems
-            # because they are seen as 0 byte files
-            return [
-                x.get("name")
-                for x in self.fs.ls(self.config.path_without_protocol, detail=True)
-                if x.get("size") > 0
-            ]
-        else:
-            # fs.find will recursively walk directories
-            # "size" is a common key for all the cloud protocols with fs
-            return [
-                k
-                for k, v in self.fs.find(
-                    self.config.path_without_protocol,
-                    detail=True,
-                ).items()
-                if v.get("size") > 0
-            ]
-
-    def get_ingest_docs(self):
-        return [
-            self.ingest_doc_cls(
-                standard_config=self.standard_config,
-                config=self.config,
-                remote_file_path=file,
-            )
-            for file in self._list_files()
-        ]
diff --git a/src/unstructured/ingest/connector/gcs.py b/src/unstructured/ingest/connector/gcs.py
deleted file mode 100644
index 256934b..0000000
--- a/src/unstructured/ingest/connector/gcs.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from dataclasses import dataclass
-from typing import Type
-
-from unstructured.ingest.connector.fsspec import (
-    FsspecConnector,
-    FsspecIngestDoc,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.interfaces import StandardConnectorConfig
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleGcsConfig(SimpleFsspecConfig):
-    pass
-
-
-class GcsIngestDoc(FsspecIngestDoc):
-    @requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-    def get_file(self):
-        super().get_file()
-
-
-@requires_dependencies(["gcsfs", "fsspec"], extras="gcs")
-class GcsConnector(FsspecConnector):
-    ingest_doc_cls: Type[GcsIngestDoc] = GcsIngestDoc
-
-    def __init__(
-        self,
-        config: SimpleGcsConfig,
-        standard_config: StandardConnectorConfig,
-    ) -> None:
-        super().__init__(standard_config, config)
diff --git a/src/unstructured/ingest/connector/git.py b/src/unstructured/ingest/connector/git.py
deleted file mode 100644
index 85268a9..0000000
--- a/src/unstructured/ingest/connector/git.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import fnmatch
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Optional
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-)
-from unstructured.ingest.logger import logger
-
-
-@dataclass
-class SimpleGitConfig(BaseConnectorConfig):
-    url: str
-    access_token: Optional[str]
-    branch: Optional[str]
-    file_glob: Optional[str]
-    repo_path: str = field(init=False, repr=False)
-
-
-@dataclass
-class GitIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleGitConfig = field(repr=False)
-    path: str
-
-    @property
-    def filename(self):
-        return (Path(self.standard_config.download_dir) / self.path).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.standard_config.output_dir) / f"{self.path}.json"
-
-    def _create_full_tmp_dir_path(self):
-        """includes directories in in the gitlab repository"""
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-        self._fetch_and_write()
-
-    def _fetch_and_write(self) -> None:
-        raise NotImplementedError()
-
-
-@dataclass
-class GitConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleGitConfig
-
-    def initialize(self):
-        pass
-
-    def is_file_type_supported(self, path: str) -> bool:
-        # Workaround to ensure that auto.partition isn't fed with .yaml, .py, etc. files
-        # TODO: What to do with no filenames? e.g. LICENSE, Makefile, etc.
-        supported = path.endswith(
-            (
-                ".md",
-                ".txt",
-                ".pdf",
-                ".doc",
-                ".docx",
-                ".eml",
-                ".html",
-                ".png",
-                ".jpg",
-                ".ppt",
-                ".pptx",
-                ".xml",
-            ),
-        )
-        if not supported:
-            logger.debug(
-                f"The file {path!r} is discarded as it does not contain a supported filetype.",
-            )
-        return supported
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if not self.config.file_glob:
-            return True
-        patterns = self.config.file_glob.split(",")
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
diff --git a/src/unstructured/ingest/connector/github.py b/src/unstructured/ingest/connector/github.py
deleted file mode 100644
index bf9754d..0000000
--- a/src/unstructured/ingest/connector/github.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-from urllib.parse import urlparse
-
-import requests
-
-from unstructured.ingest.connector.git import (
-    GitConnector,
-    GitIngestDoc,
-    SimpleGitConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from github.Repository import Repository
-
-
-@dataclass
-class SimpleGitHubConfig(SimpleGitConfig):
-    def __post_init__(self):
-        parsed_gh_url = urlparse(self.url)
-        path_fragments = [fragment for fragment in parsed_gh_url.path.split("/") if fragment]
-
-        # If a scheme and netloc are provided, ensure they are correct
-        # Additionally, ensure that the path contains two fragments
-        if (
-            (parsed_gh_url.scheme and parsed_gh_url.scheme != "https")
-            or (parsed_gh_url.netloc and parsed_gh_url.netloc != "github.com")
-            or len(path_fragments) != 2
-        ):
-            raise ValueError(
-                'Please provide a valid URL, e.g. "https://github.com/Unstructured-IO/unstructured"'
-                ' or a repository owner/name pair, e.g. "Unstructured-IO/unstructured".',
-            )
-
-        # If there's no issues, store the core repository info
-        self.repo_path = parsed_gh_url.path
-
-
-@dataclass
-class GitHubIngestDoc(GitIngestDoc):
-    repo: "Repository"
-
-    def _fetch_and_write(self) -> None:
-        content_file = self.repo.get_contents(self.path)
-        contents = b""
-        if (
-            not content_file.content  # type: ignore
-            and content_file.encoding == "none"  # type: ignore
-            and content_file.size  # type: ignore
-        ):
-            logger.info("File too large for the GitHub API, using direct download link instead.")
-            response = requests.get(content_file.download_url)  # type: ignore
-            if response.status_code != 200:
-                logger.info("Direct download link has failed... Skipping this file.")
-            else:
-                contents = response.content
-        else:
-            contents = content_file.decoded_content  # type: ignore
-
-        with open(self.filename, "wb") as f:
-            f.write(contents)
-
-
-@requires_dependencies(["github"], extras="github")
-@dataclass
-class GitHubConnector(GitConnector):
-    def __post_init__(self) -> None:
-        from github import Github
-
-        self.github = Github(self.config.access_token)
-
-    def get_ingest_docs(self):
-        repo = self.github.get_repo(self.config.repo_path)
-
-        # Load the Git tree with all files, and then create Ingest docs
-        # for all blobs, i.e. all files, ignoring directories
-        sha = self.config.branch or repo.default_branch
-        git_tree = repo.get_git_tree(sha, recursive=True)
-        return [
-            GitHubIngestDoc(self.standard_config, self.config, element.path, repo)
-            for element in git_tree.tree
-            if element.type == "blob"
-            and self.is_file_type_supported(element.path)
-            and (not self.config.file_glob or self.does_path_match_glob(element.path))
-        ]
diff --git a/src/unstructured/ingest/connector/gitlab.py b/src/unstructured/ingest/connector/gitlab.py
deleted file mode 100644
index 40bb361..0000000
--- a/src/unstructured/ingest/connector/gitlab.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-from urllib.parse import urlparse
-
-from unstructured.ingest.connector.git import (
-    GitConnector,
-    GitIngestDoc,
-    SimpleGitConfig,
-)
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from gitlab.v4.objects.projects import Project
-
-
-@dataclass
-class SimpleGitLabConfig(SimpleGitConfig):
-    def __post_init__(self):
-        parsed_gh_url = urlparse(self.url)
-
-        # If no scheme or netloc are provided, use the default gitlab.com
-        if not parsed_gh_url.scheme and not parsed_gh_url.netloc:
-            self.url = "https://gitlab.com"
-        else:
-            self.url = f"{parsed_gh_url.scheme}://{parsed_gh_url.netloc}"
-        self.repo_path = parsed_gh_url.path
-        while self.repo_path.startswith("/"):
-            self.repo_path = self.repo_path[1:]
-
-
-@dataclass
-class GitLabIngestDoc(GitIngestDoc):
-    project: "Project"
-
-    def _fetch_and_write(self) -> None:
-        content_file = self.project.files.get(
-            self.path,
-            ref=self.config.branch or self.project.default_branch,
-        )
-        contents = content_file.decode()
-
-        with open(self.filename, "wb") as f:
-            f.write(contents)
-
-
-@requires_dependencies(["gitlab"], extras="gitlab")
-@dataclass
-class GitLabConnector(GitConnector):
-    def __post_init__(self) -> None:
-        from gitlab import Gitlab
-
-        self.gitlab = Gitlab(self.config.url, private_token=self.config.access_token)
-
-    def get_ingest_docs(self):
-        # Load the Git tree with all files, and then create Ingest docs
-        # for all blobs, i.e. all files, ignoring directories
-        project = self.gitlab.projects.get(self.config.repo_path)
-        ref = self.config.branch or project.default_branch
-        git_tree = project.repository_tree(
-            ref=ref,
-            recursive=True,
-            iterator=True,
-            all=True,
-        )
-        return [
-            GitLabIngestDoc(self.standard_config, self.config, element["path"], project)
-            for element in git_tree
-            if element["type"] == "blob"
-            and self.is_file_type_supported(element["path"])
-            and (not self.config.file_glob or self.does_path_match_glob(element["path"]))
-        ]
diff --git a/src/unstructured/ingest/connector/google_drive.py b/src/unstructured/ingest/connector/google_drive.py
deleted file mode 100644
index 7053a03..0000000
--- a/src/unstructured/ingest/connector/google_drive.py
+++ /dev/null
@@ -1,262 +0,0 @@
-import io
-import json
-import os
-from dataclasses import dataclass
-from mimetypes import guess_extension
-from pathlib import Path
-from typing import TYPE_CHECKING, Dict, Optional
-
-from unstructured.file_utils.filetype import EXT_TO_FILETYPE
-from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    BaseSessionHandle,
-    ConfigSessionHandleMixin,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    IngestDocSessionHandleMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from googleapiclient.discovery import Resource as GoogleAPIResource
-
-FILE_FORMAT = "{id}-{name}{ext}"
-DIRECTORY_FORMAT = "{id}-{name}"
-
-
-@dataclass
-class GoogleDriveSessionHandle(BaseSessionHandle):
-    service: "GoogleAPIResource"
-
-
-@requires_dependencies(["googleapiclient"], extras="google-drive")
-def create_service_account_object(key_path, id=None):
-    """
-    Creates a service object for interacting with Google Drive.
-
-    Providing a drive id enforces a key validation process.
-
-    Args:
-        key_path: Path to Google Drive service account json file.
-        id: ID of a file on Google Drive. File has to be either publicly accessible or accessible
-            to the service account.
-
-    Returns:
-        Service account object
-    """
-    from google.auth import default, exceptions
-    from googleapiclient.discovery import build
-    from googleapiclient.errors import HttpError
-
-    try:
-        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
-        creds, _ = default()
-        service = build("drive", "v3", credentials=creds)
-
-        if id:
-            service.files().list(
-                spaces="drive",
-                fields="files(id)",
-                pageToken=None,
-                corpora="user",
-                q=f"'{id}' in parents",
-            ).execute()
-
-    except HttpError as exc:
-        raise ValueError(f"{exc.reason}")
-    except exceptions.DefaultCredentialsError:
-        raise ValueError("The provided API key is invalid.")
-
-    return service
-
-
-@dataclass
-class SimpleGoogleDriveConfig(ConfigSessionHandleMixin, BaseConnectorConfig):
-    """Connector config where drive_id is the id of the document to process or
-    the folder to process all documents from."""
-
-    # Google Drive Specific Options
-    drive_id: str
-    service_account_key: str
-    extension: Optional[str]
-    recursive: bool = False
-
-    def __post_init__(self):
-        if self.extension and self.extension not in EXT_TO_FILETYPE:
-            raise ValueError(
-                f"Extension not supported. "
-                f"Value MUST be one of {', '.join([k for k in EXT_TO_FILETYPE if k is not None])}.",
-            )
-
-    def create_session_handle(
-        self,
-    ) -> GoogleDriveSessionHandle:
-        service = create_service_account_object(self.service_account_key)
-        return GoogleDriveSessionHandle(service=service)
-
-
-@dataclass
-class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleGoogleDriveConfig
-    file_meta: Dict
-
-    @property
-    def filename(self):
-        return Path(self.file_meta.get("download_filepath")).resolve()  # type: ignore
-
-    @property
-    def _output_filename(self):
-        return Path(f"{self.file_meta.get('output_filepath')}.json").resolve()
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(["googleapiclient"], extras="google-drive")
-    def get_file(self):
-        from googleapiclient.errors import HttpError
-        from googleapiclient.http import MediaIoBaseDownload
-
-        if self.file_meta.get("mimeType", "").startswith("application/vnd.google-apps"):
-            export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
-                self.file_meta.get("mimeType"),  # type: ignore
-            )
-            if not export_mime:
-                logger.info(
-                    f"File not supported. Name: {self.file_meta.get('name')} "
-                    f"ID: {self.file_meta.get('id')} "
-                    f"MimeType: {self.file_meta.get('mimeType')}",
-                )
-                return
-
-            request = self.session_handle.service.files().export_media(
-                fileId=self.file_meta.get("id"),
-                mimeType=export_mime,
-            )
-        else:
-            request = self.session_handle.service.files().get_media(fileId=self.file_meta.get("id"))
-        file = io.BytesIO()
-        downloader = MediaIoBaseDownload(file, request)
-        downloaded = False
-        try:
-            while downloaded is False:
-                status, downloaded = downloader.next_chunk()
-        except HttpError:
-            pass
-
-        saved = False
-        if downloaded and file:
-            dir_ = self.file_meta.get("download_dir")
-            if dir_:
-                if not dir_.is_dir():
-                    logger.debug(f"Creating directory: {self.file_meta.get('download_dir')}")
-
-                    if dir_:
-                        dir_.mkdir(parents=True, exist_ok=True)
-
-                with open(self.filename, "wb") as handler:
-                    handler.write(file.getbuffer())
-                    saved = True
-                    logger.debug(f"File downloaded: {self.filename}.")
-
-        if not saved:
-            logger.error(f"Error while downloading and saving file: {self.filename}.")
-
-    def write_result(self):
-        """Write the structured json result for this doc. result must be json serializable."""
-        if self.standard_config.download_only:
-            return
-        self._output_filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self._output_filename, "w") as output_f:
-            output_f.write(json.dumps(self.isd_elems_no_filename, ensure_ascii=False, indent=2))
-        logger.info(f"Wrote {self._output_filename}")
-
-
-class GoogleDriveConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching documents from Google Drive"""
-
-    config: SimpleGoogleDriveConfig
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: SimpleGoogleDriveConfig):
-        super().__init__(standard_config, config)
-
-    def _list_objects(self, drive_id, recursive=False):
-        files = []
-        service = self.config.create_session_handle().service
-
-        def traverse(drive_id, download_dir, output_dir, recursive=False):
-            page_token = None
-            while True:
-                response = (
-                    service.files()
-                    .list(
-                        spaces="drive",
-                        fields="nextPageToken, files(id, name, mimeType)",
-                        pageToken=page_token,
-                        corpora="user",
-                        q=f"'{drive_id}' in parents",
-                    )
-                    .execute()
-                )
-
-                for meta in response.get("files", []):
-                    if meta.get("mimeType") == "application/vnd.google-apps.folder":
-                        dir_ = DIRECTORY_FORMAT.format(name=meta.get("name"), id=meta.get("id"))
-                        if recursive:
-                            download_sub_dir = (download_dir / dir_).resolve()
-                            output_sub_dir = (output_dir / dir_).resolve()
-                            traverse(meta.get("id"), download_sub_dir, output_sub_dir, True)
-                    else:
-                        ext = ""
-                        if not Path(meta.get("name")).suffixes:
-                            guess = guess_extension(meta.get("mimeType"))
-                            ext = guess if guess else ext
-
-                        if meta.get("mimeType", "").startswith("application/vnd.google-apps"):
-                            export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(meta.get("mimeType"))
-                            if not export_mime:
-                                logger.info(
-                                    f"File {meta.get('name')} has an "
-                                    f"unsupported MimeType {meta.get('mimeType')}",
-                                )
-                                continue
-
-                            if not ext:
-                                guess = guess_extension(export_mime)
-                                ext = guess if guess else ext
-
-                        # TODO (Habeeb): Consider filtering at the query level.
-                        if self.config.extension and self.config.extension != ext:  # noqa: SIM102
-                            logger.debug(
-                                f"File {meta.get('name')} does not match "
-                                f"the file type {self.config.extension}",
-                            )
-                            continue
-
-                        name = FILE_FORMAT.format(name=meta.get("name"), id=meta.get("id"), ext=ext)
-                        meta["download_dir"] = download_dir
-                        meta["download_filepath"] = (download_dir / name).resolve()
-                        meta["output_dir"] = output_dir
-                        meta["output_filepath"] = (output_dir / name).resolve()
-                        files.append(meta)
-
-                page_token = response.get("nextPageToken", None)
-                if page_token is None:
-                    break
-
-        traverse(
-            drive_id,
-            Path(self.standard_config.download_dir),
-            Path(self.standard_config.output_dir),
-            recursive,
-        )
-        return files
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        files = self._list_objects(self.config.drive_id, self.config.recursive)
-        return [GoogleDriveIngestDoc(self.standard_config, self.config, file) for file in files]
diff --git a/src/unstructured/ingest/connector/local.py b/src/unstructured/ingest/connector/local.py
deleted file mode 100644
index 6d4901e..0000000
--- a/src/unstructured/ingest/connector/local.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import fnmatch
-import glob
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional, Type
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-
-
-@dataclass
-class SimpleLocalConfig(BaseConnectorConfig):
-    # Local specific options
-    input_path: str
-    recursive: bool = False
-    file_glob: Optional[str] = None
-
-    def __post_init__(self):
-        if os.path.isfile(self.input_path):
-            self.input_path_is_file = True
-        else:
-            self.input_path_is_file = False
-
-
-@dataclass
-class LocalIngestDoc(BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-    """
-
-    config: SimpleLocalConfig
-    path: str
-
-    @property
-    def filename(self):
-        """The filename of the local file to be processed"""
-        return Path(self.path)
-
-    def cleanup_file(self):
-        """Not applicable to local file system"""
-        pass
-
-    def get_file(self):
-        """Not applicable to local file system"""
-        pass
-
-    @property
-    def _output_filename(self) -> Path:
-        """Returns output filename for the doc
-        If input path argument is a file itself, it returns the filename of the doc.
-        If input path argument is a folder, it returns the relative path of the doc.
-        """
-        input_path = Path(self.config.input_path)
-        basename = (
-            f"{Path(self.path).name}.json"
-            if input_path.is_file()
-            else f"{Path(self.path).relative_to(input_path)}.json"
-        )
-        return Path(self.standard_config.output_dir) / basename
-
-
-class LocalConnector(BaseConnector):
-    """Objects of this class support fetching document(s) from local file system"""
-
-    config: SimpleLocalConfig
-    ingest_doc_cls: Type[LocalIngestDoc] = LocalIngestDoc
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleLocalConfig,
-    ):
-        super().__init__(standard_config, config)
-
-    def cleanup(self, cur_dir=None):
-        """Not applicable to local file system"""
-        pass
-
-    def initialize(self):
-        """Not applicable to local file system"""
-        pass
-
-    def _list_files(self):
-        if self.config.input_path_is_file:
-            return glob.glob(f"{self.config.input_path}")
-        elif self.config.recursive:
-            return glob.glob(f"{self.config.input_path}/**", recursive=self.config.recursive)
-        else:
-            return glob.glob(f"{self.config.input_path}/*")
-
-    def does_path_match_glob(self, path: str) -> bool:
-        if self.config.file_glob is None:
-            return True
-        patterns = self.config.file_glob.split(",")
-        for pattern in patterns:
-            if fnmatch.filter([path], pattern):
-                return True
-        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
-        return False
-
-    def get_ingest_docs(self):
-        return [
-            self.ingest_doc_cls(
-                self.standard_config,
-                self.config,
-                file,
-            )
-            for file in self._list_files()
-            if os.path.isfile(file) and self.does_path_match_glob(file)
-        ]
diff --git a/src/unstructured/ingest/connector/notion/__init__.py b/src/unstructured/ingest/connector/notion/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unstructured/ingest/connector/notion/client.py b/src/unstructured/ingest/connector/notion/client.py
deleted file mode 100644
index 1b8fcba..0000000
--- a/src/unstructured/ingest/connector/notion/client.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from typing import Any, Generator, List, Tuple
-
-from notion_client import Client as NotionClient
-from notion_client.api_endpoints import (
-    BlocksChildrenEndpoint as NotionBlocksChildrenEndpoint,
-)
-from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint
-from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint
-from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
-
-from unstructured.ingest.connector.notion.types.block import Block
-from unstructured.ingest.connector.notion.types.database import Database
-from unstructured.ingest.connector.notion.types.database_properties import (
-    map_cells,
-)
-from unstructured.ingest.connector.notion.types.page import Page
-
-
-class BlocksChildrenEndpoint(NotionBlocksChildrenEndpoint):
-    def list(self, block_id: str, **kwargs: Any) -> Tuple[List[Block], dict]:
-        resp: dict = super().list(block_id=block_id, **kwargs)  # type: ignore
-        child_blocks = [Block.from_dict(data=b) for b in resp.pop("results", [])]
-        return child_blocks, resp
-
-    def iterate_list(
-        self,
-        block_id: str,
-        **kwargs: Any,
-    ) -> Generator[List[Block], None, None]:
-        while True:
-            response: dict = super().list(block_id=block_id, **kwargs)  # type: ignore
-            child_blocks = [Block.from_dict(data=b) for b in response.pop("results", [])]
-            yield child_blocks
-
-            next_cursor = response.get("next_cursor")
-            if not response.get("has_more") or not next_cursor:
-                return
-
-
-class DatabasesEndpoint(NotionDatabasesEndpoint):
-    def retrieve(self, database_id: str, **kwargs: Any) -> Database:
-        resp: dict = super().retrieve(database_id=database_id, **kwargs)  # type: ignore
-        return Database.from_dict(data=resp)
-
-    def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]:
-        """Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database.
-
-        *[🔗 Endpoint documentation](https://developers.notion.com/reference/post-database-query)*
-        """  # noqa: E501
-        resp: dict = super().query(database_id=database_id, **kwargs)  # type: ignore
-        pages = [Page.from_dict(data=p) for p in resp.pop("results")]
-        for p in pages:
-            p.properties = map_cells(p.properties)
-        return pages, resp
-
-    def iterate_query(self, database_id: str, **kwargs: Any) -> Generator[List[Page], None, None]:
-        while True:
-            response: dict = super().query(database_id=database_id, **kwargs)  # type: ignore
-            pages = [Page.from_dict(data=p) for p in response.pop("results", [])]
-            for p in pages:
-                p.properties = map_cells(p.properties)
-            yield pages
-
-            next_cursor = response.get("next_cursor")
-            if not response.get("has_more") or not next_cursor:
-                return
-
-
-class BlocksEndpoint(NotionBlocksEndpoint):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.children = BlocksChildrenEndpoint(*args, **kwargs)
-
-    def retrieve(self, block_id: str, **kwargs: Any) -> Block:
-        resp: dict = super().retrieve(block_id=block_id, **kwargs)  # type: ignore
-        return Block.from_dict(data=resp)
-
-
-class PagesEndpoint(NotionPagesEndpoint):
-    def retrieve(self, page_id: str, **kwargs: Any) -> Page:
-        resp: dict = super().retrieve(page_id=page_id, **kwargs)  # type: ignore
-        return Page.from_dict(data=resp)
-
-
-class Client(NotionClient):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.blocks = BlocksEndpoint(self)
-        self.pages = PagesEndpoint(self)
-        self.databases = DatabasesEndpoint(self)
diff --git a/src/unstructured/ingest/connector/notion/connector.py b/src/unstructured/ingest/connector/notion/connector.py
deleted file mode 100644
index dcc571c..0000000
--- a/src/unstructured/ingest/connector/notion/connector.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import logging
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import List, Optional
-from uuid import UUID
-
-from unstructured.ingest.connector.notion.types.database import Database
-from unstructured.ingest.connector.notion.types.page import Page
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import make_default_logger
-from unstructured.utils import (
-    requires_dependencies,
-)
-
-
-@dataclass
-class SimpleNotionConfig(BaseConnectorConfig):
-    """Connector config to process all messages by channel id's."""
-
-    page_ids: List[str]
-    database_ids: List[str]
-    recursive: bool
-    api_key: str
-    verbose: bool
-    logger: Optional[logging.Logger] = None
-
-    @staticmethod
-    def parse_ids(ids_str: str) -> List[str]:
-        """Parses a comma separated list of ids into a list of UUID strings."""
-        return [str(UUID(x.strip())) for x in ids_str.split(",")]
-
-    def get_logger(self) -> logging.Logger:
-        if self.logger:
-            return self.logger
-        return make_default_logger(logging.DEBUG if self.verbose else logging.INFO)
-
-
-@dataclass
-class NotionPageIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    page_id: str
-    api_key: str
-    config: SimpleNotionConfig
-    file_metadata: Optional[Page] = None
-    file_exists: bool = False
-    check_exists: bool = False
-
-    def _tmp_download_file(self):
-        page_file = self.page_id + ".html"
-        return Path(self.standard_config.download_dir) / page_file
-
-    @property
-    def _output_filename(self):
-        page_file = self.page_id + ".json"
-        return Path(self.standard_config.output_dir) / page_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_file(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-        from unstructured.ingest.connector.notion.helpers import extract_page_html
-
-        self._create_full_tmp_dir_path()
-
-        self.config.get_logger().debug(f"fetching page {self.page_id} - PID: {os.getpid()}")
-
-        client = NotionClient(auth=self.api_key, logger=self.config.get_logger())
-
-        try:
-            text_extraction = extract_page_html(
-                client=client,
-                page_id=self.page_id,
-                logger=self.config.get_logger(),
-            )
-            self.check_exists = True
-            self.file_exists = True
-            if html := text_extraction.html:
-                with open(self._tmp_download_file(), "w") as page_file:
-                    page_file.write(html.render(pretty=True))
-
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                self.config.get_logger().error(f"Error: {error}")
-
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_file_metadata(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-        client = NotionClient(auth=self.api_key, logger=self.config.get_logger())
-
-        # The Notion block endpoint gives more hierarchical information (parent,child relationships)
-        # than the pages endpoint so choosing to use that one to get metadata about the page
-        try:
-            self.file_metadata = client.pages.retrieve(page_id=self.page_id)  # type: ignore
-            self.check_exists = True
-            self.file_exists = True
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                self.config.get_logger().error(f"Error: {error}")
-
-    @property
-    def date_created(self) -> Optional[str]:
-        """The date the document was created on the source system."""
-        if not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.created_time if self.file_metadata else None
-
-    @property
-    def date_modified(self) -> Optional[str]:
-        """The date the document was last modified on the source system."""
-        if not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.last_edited_time if self.file_metadata else None
-
-    @property
-    def exists(self) -> Optional[bool]:
-        """Whether the document exists on the remote source."""
-        if self.check_exists:
-            return self.file_exists
-
-        self.get_file_metadata()
-
-        return self.file_exists
-
-    @property
-    def filename(self):
-        """The filename of the file created from a notion page"""
-        return self._tmp_download_file()
-
-
-@dataclass
-class NotionDatabaseIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    database_id: str
-    api_key: str
-    config: SimpleNotionConfig
-    file_metadata: Optional[Database] = None
-    file_exists: bool = False
-    check_exists: bool = False
-
-    def _tmp_download_file(self):
-        page_file = self.database_id + ".html"
-        return Path(self.standard_config.download_dir) / page_file
-
-    @property
-    def _output_filename(self):
-        page_file = self.database_id + ".json"
-        return Path(self.standard_config.output_dir) / page_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_file(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-        from unstructured.ingest.connector.notion.helpers import extract_database_html
-
-        self._create_full_tmp_dir_path()
-
-        self.config.get_logger().debug(f"fetching database {self.database_id} - PID: {os.getpid()}")
-
-        client = NotionClient(auth=self.api_key, logger=self.config.get_logger())
-
-        try:
-            text_extraction = extract_database_html(
-                client=client,
-                database_id=self.database_id,
-                logger=self.config.get_logger(),
-            )
-            self.check_exists = True
-            self.file_exists = True
-            if html := text_extraction.html:
-                with open(self._tmp_download_file(), "w") as page_file:
-                    page_file.write(html.render(pretty=True))
-
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                self.config.get_logger().error(f"Error: {error}")
-
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_file_metadata(self):
-        from notion_client import APIErrorCode, APIResponseError
-
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-
-        client = NotionClient(auth=self.api_key, logger=self.config.get_logger())
-
-        # The Notion block endpoint gives more hierarchical information (parent,child relationships)
-        # than the pages endpoint so choosing to use that one to get metadata about the page
-        try:
-            self.file_metadata = client.databases.retrieve(
-                database_id=self.database_id,
-            )  # type: ignore
-            self.check_exists = True
-            self.file_exists = True
-        except APIResponseError as error:
-            if error.code == APIErrorCode.ObjectNotFound:
-                self.check_exists = True
-                self.file_exists = False
-            else:
-                self.config.get_logger().error(f"Error: {error}")
-
-    @property
-    def date_created(self) -> Optional[str]:
-        """The date the document was created on the source system."""
-        if not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.created_time if self.file_metadata else None
-
-    @property
-    def date_modified(self) -> Optional[str]:
-        """The date the document was last modified on the source system."""
-        if not self.file_metadata:
-            self.get_file_metadata()
-
-        return self.file_metadata.last_edited_time if self.file_metadata else None
-
-    @property
-    def exists(self) -> Optional[bool]:
-        """Whether the document exists on the remote source."""
-        if self.check_exists:
-            return self.file_exists
-
-        self.get_file_metadata()
-
-        return self.file_exists
-
-    @property
-    def filename(self):
-        """The filename of the file created from a notion page"""
-        return self._tmp_download_file()
-
-
-@requires_dependencies(dependencies=["notion_client"])
-class NotionConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    config: SimpleNotionConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleNotionConfig,
-    ):
-        super().__init__(
-            standard_config=standard_config,
-            config=config,
-        )
-
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-        pass
-
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_child_page_content(self, page_id: str):
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_page,
-        )
-
-        client = NotionClient(auth=self.config.api_key, logger=self.config.get_logger())
-
-        child_content = get_recursive_content_from_page(
-            client=client,
-            page_id=page_id,
-            logger=self.config.get_logger(),
-        )
-        return child_content
-
-    def get_child_content(self, page_id: str):
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_page,
-        )
-
-        client = NotionClient(auth=self.config.api_key, logger=self.config.logger)
-
-        child_content = get_recursive_content_from_page(
-            client=client,
-            page_id=page_id,
-            logger=self.config.get_logger(),
-        )
-        return child_content
-
-    @requires_dependencies(dependencies=["notion_client"])
-    def get_child_database_content(self, database_id: str):
-        from unstructured.ingest.connector.notion.client import Client as NotionClient
-        from unstructured.ingest.connector.notion.helpers import (
-            get_recursive_content_from_database,
-        )
-
-        client = NotionClient(auth=self.config.api_key, logger=self.config.get_logger())
-
-        child_content = get_recursive_content_from_database(
-            client=client,
-            database_id=database_id,
-            logger=self.config.get_logger(),
-        )
-        return child_content
-
-    def get_ingest_docs(self):
-        docs: List[BaseIngestDoc] = []
-        if self.config.page_ids:
-            docs += [
-                NotionPageIngestDoc(
-                    standard_config=self.standard_config,
-                    config=self.config,
-                    page_id=page_id,
-                    api_key=self.config.api_key,
-                )
-                for page_id in self.config.page_ids
-            ]
-        if self.config.database_ids:
-            docs += [
-                NotionDatabaseIngestDoc(
-                    standard_config=self.standard_config,
-                    config=self.config,
-                    database_id=database_id,
-                    api_key=self.config.api_key,
-                )
-                for database_id in self.config.database_ids
-            ]
-        if self.config.recursive:
-            child_pages = []
-            child_databases = []
-            for page_id in self.config.page_ids:
-                child_content = self.get_child_page_content(page_id=page_id)
-                child_pages.extend(child_content.child_pages)
-                child_databases.extend(child_content.child_databases)
-
-            for database_id in self.config.database_ids:
-                child_content = self.get_child_database_content(database_id=database_id)
-                child_pages.extend(child_content.child_pages)
-                child_databases.extend(child_content.child_databases)
-
-            # Remove duplicates
-            child_pages = list(set(child_pages))
-            child_pages = [c for c in child_pages if c not in self.config.page_ids]
-
-            child_databases = list(set(child_databases))
-            child_databases = [db for db in child_databases if db not in self.config.database_ids]
-
-            if child_pages:
-                self.config.get_logger().info(
-                    "Adding the following child page ids: {}".format(", ".join(child_pages)),
-                )
-                docs += [
-                    NotionPageIngestDoc(
-                        standard_config=self.standard_config,
-                        config=self.config,
-                        page_id=page_id,
-                        api_key=self.config.api_key,
-                    )
-                    for page_id in child_pages
-                ]
-
-            if child_databases:
-                self.config.get_logger().info(
-                    "Adding the following child database ids: {}".format(
-                        ", ".join(child_databases),
-                    ),
-                )
-                docs += [
-                    NotionDatabaseIngestDoc(
-                        standard_config=self.standard_config,
-                        config=self.config,
-                        database_id=database_id,
-                        api_key=self.config.api_key,
-                    )
-                    for database_id in child_databases
-                ]
-
-        return docs
diff --git a/src/unstructured/ingest/connector/notion/helpers.py b/src/unstructured/ingest/connector/notion/helpers.py
deleted file mode 100644
index 5bc1c84..0000000
--- a/src/unstructured/ingest/connector/notion/helpers.py
+++ /dev/null
@@ -1,525 +0,0 @@
-import enum
-import logging
-from dataclasses import dataclass, field
-from typing import List, Optional, Tuple
-from urllib.parse import urlparse
-from uuid import UUID
-
-from htmlBuilder.attributes import Style, Type
-from htmlBuilder.tags import (
-    Body,
-    Div,
-    Head,
-    Html,
-    HtmlTag,
-    Ol,
-    Table,
-    Td,
-    Th,
-    Title,
-    Tr,
-    Ul,
-)
-
-import unstructured.ingest.connector.notion.types.blocks as notion_blocks
-from unstructured.ingest.connector.notion.client import Client
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.block import Block
-from unstructured.ingest.connector.notion.types.database import Database
-
-
-@dataclass
-class TextExtractionResponse:
-    text: Optional[str] = None
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-@dataclass
-class HtmlExtractionResponse:
-    html: Optional[HtmlTag] = None
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-def extract_page_html(
-    client: Client,
-    page_id: str,
-    logger: logging.Logger,
-) -> HtmlExtractionResponse:
-    page_id_uuid = UUID(page_id)
-    html_elements: List[Tuple[BlockBase, HtmlTag]] = []
-    parent_block: Block = client.blocks.retrieve(block_id=page_id)  # type: ignore
-    head = None
-    if isinstance(parent_block.block, notion_blocks.ChildPage):
-        head = Head([], Title([], parent_block.block.title))
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    parents: List[Tuple[int, Block]] = [(0, parent_block)]
-    processed_block_ids = []
-    while len(parents) > 0:
-        level, parent = parents.pop(0)
-        parent_html = parent.get_html()
-        if parent_html:
-            html_elements.append((parent.block, parent_html))
-        logger.debug(f"processing block: {parent}")
-        if isinstance(parent.block, notion_blocks.ChildPage) and parent.id != str(page_id_uuid):
-            child_pages.append(parent.id)
-            continue
-        if isinstance(parent.block, notion_blocks.ChildDatabase):
-            child_databases.append(parent.id)
-            continue
-        if isinstance(parent.block, notion_blocks.Table):
-            table_response = build_table(client=client, table=parent)
-            html_elements.append((parent.block, table_response.table_html))
-            child_pages.extend(table_response.child_pages)
-            child_databases.extend(table_response.child_databases)
-            continue
-        if isinstance(parent.block, notion_blocks.ColumnList):
-            column_html = build_columned_list(client=client, column_parent=parent)
-            html_elements.append((parent.block, column_html))
-            continue
-        if isinstance(parent.block, notion_blocks.BulletedListItem):
-            bullet_list_resp = build_bulleted_list_children(
-                client=client,
-                bulleted_list_item_parent=parent,
-            )
-            if bullet_list_children := bullet_list_resp.child_list:
-                html_elements.append((parent.block, bullet_list_children))
-            continue
-        if isinstance(parent.block, notion_blocks.NumberedListItem):
-            numbered_list_resp = build_numbered_list_children(
-                client=client,
-                numbered_list_item_parent=parent,
-            )
-            if numbered_list_children := numbered_list_resp.child_list:
-                html_elements.append((parent.block, numbered_list_children))
-            continue
-        if parent.block.can_have_children() and parent.has_children:
-            children = []
-            for children_block in client.blocks.children.iterate_list(  # type: ignore
-                block_id=parent.id,
-            ):
-                children.extend(children_block)
-            if children:
-                logger.debug(f"Adding {len(children)} children from parent: {parent}")
-                for child in children:
-                    if child.id not in processed_block_ids:
-                        parents.append((level + 1, child))
-        processed_block_ids.append(parent)
-
-    # Join list items
-    joined_html_elements = []
-    numbered_list_items = []
-    bullet_list_items = []
-    for block, html in html_elements:
-        if isinstance(block, notion_blocks.BulletedListItem):
-            bullet_list_items.append(html)
-            continue
-        if isinstance(block, notion_blocks.NumberedListItem):
-            numbered_list_items.append(html)
-            continue
-        if len(numbered_list_items) > 0:
-            joined_html_elements.append(Ol([], numbered_list_items))
-            numbered_list_items = []
-        if len(bullet_list_items) > 0:
-            joined_html_elements.append(Ul([], bullet_list_items))
-            bullet_list_items = []
-        joined_html_elements.append(html)
-
-    body = Body([], joined_html_elements)
-    all_elements = [body]
-    if head:
-        all_elements = [head] + all_elements
-    full_html = Html([], all_elements)
-    return HtmlExtractionResponse(
-        full_html,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-def extract_database_html(
-    client: Client,
-    database_id: str,
-    logger: logging.Logger,
-) -> HtmlExtractionResponse:
-    logger.debug(f"processing database id: {database_id}")
-    database: Database = client.databases.retrieve(database_id=database_id)  # type: ignore
-    property_keys = list(database.properties.keys())
-    property_keys = sorted(property_keys)
-    table_html_rows = []
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    # Create header row
-    table_html_rows.append(Tr([], [Th([], k) for k in property_keys]))
-
-    all_pages = []
-    for page_chunk in client.databases.iterate_query(database_id=database_id):  # type: ignore
-        all_pages.extend(page_chunk)
-
-    logger.debug(f"Creating {len(all_pages)} rows")
-    for page in all_pages:
-        if is_database_url(page.url):
-            child_databases.append(page.id)
-        if is_page_url(page.url):
-            child_pages.append(page.id)
-        properties = page.properties
-        inner_html = [properties.get(k).get_html() for k in property_keys]  # type: ignore
-        table_html_rows.append(
-            Tr(
-                [],
-                [Td([], cell) for cell in [html if html else Div([], []) for html in inner_html]],
-            ),
-        )
-
-    table_html = Table([], table_html_rows)
-
-    return HtmlExtractionResponse(
-        html=table_html,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-@dataclass
-class ChildExtractionResponse:
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-class QueueEntryType(enum.Enum):
-    DATABASE = "database"
-    PAGE = "page"
-
-
-@dataclass
-class QueueEntry:
-    type: QueueEntryType
-    id: UUID
-
-
-def get_recursive_content_from_page(
-    client: Client,
-    page_id: str,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    return get_recursive_content(
-        client=client,
-        init_entry=QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)),
-        logger=logger,
-    )
-
-
-def get_recursive_content_from_database(
-    client: Client,
-    database_id: str,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    return get_recursive_content(
-        client=client,
-        init_entry=QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)),
-        logger=logger,
-    )
-
-
-def get_recursive_content(
-    client: Client,
-    init_entry: QueueEntry,
-    logger: logging.Logger,
-) -> ChildExtractionResponse:
-    parents: List[QueueEntry] = [init_entry]
-    child_pages = []
-    child_dbs = []
-    processed = []
-    while len(parents) > 0:
-        parent: QueueEntry = parents.pop()
-        processed.append(parent.id)
-        if parent.type == QueueEntryType.PAGE:
-            logger.debug(f"Getting child data from page: {parent.id}")
-            for children in client.blocks.children.iterate_list(  # type: ignore
-                block_id=str(parent.id),
-            ):
-                child_pages_from_page = [
-                    c for c in children if isinstance(c.block, notion_blocks.ChildPage)
-                ]
-                if child_pages_from_page:
-                    child_page_blocks: List[notion_blocks.ChildPage] = [
-                        p.block
-                        for p in child_pages_from_page
-                        if isinstance(p.block, notion_blocks.ChildPage)
-                    ]
-                    logger.debug(
-                        "found child pages from parent page {}: {}".format(
-                            parent.id,
-                            ", ".join([block.title for block in child_page_blocks]),
-                        ),
-                    )
-                new_pages = [p.id for p in child_pages_from_page if p.id not in processed]
-                child_pages.extend(new_pages)
-                parents.extend(
-                    [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
-                )
-
-                child_dbs_from_page = [
-                    c for c in children if isinstance(c.block, notion_blocks.ChildDatabase)
-                ]
-                if child_dbs_from_page:
-                    child_db_blocks: List[notion_blocks.ChildDatabase] = [
-                        c.block
-                        for c in children
-                        if isinstance(c.block, notion_blocks.ChildDatabase)
-                    ]
-                    logger.debug(
-                        "found child database from parent page {}: {}".format(
-                            parent.id,
-                            ", ".join([block.title for block in child_db_blocks]),
-                        ),
-                    )
-                new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed]
-                child_dbs.extend(new_dbs)
-                parents.extend(
-                    [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
-                )
-        elif parent.type == QueueEntryType.DATABASE:
-            logger.debug(f"Getting child data from database: {parent.id}")
-            for page_entries in client.databases.iterate_query(  # type: ignore
-                database_id=str(parent.id),
-            ):
-                child_pages_from_db = [p for p in page_entries if is_page_url(p.url)]
-                if child_pages_from_db:
-                    logger.debug(
-                        "found child pages from parent database {}: {}".format(
-                            parent.id,
-                            ", ".join([p.url for p in child_pages_from_db]),
-                        ),
-                    )
-                new_pages = [p.id for p in child_pages_from_db if p.id not in processed]
-                child_pages.extend(new_pages)
-                parents.extend(
-                    [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages],
-                )
-
-                child_dbs_from_db = [p for p in page_entries if is_database_url(p.url)]
-                if child_dbs_from_db:
-                    logger.debug(
-                        "found child database from parent database {}: {}".format(
-                            parent.id,
-                            ", ".join([db.url for db in child_dbs_from_db]),
-                        ),
-                    )
-                new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed]
-                child_dbs.extend(new_dbs)
-                parents.extend(
-                    [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs],
-                )
-
-    return ChildExtractionResponse(
-        child_pages=child_pages,
-        child_databases=child_dbs,
-    )
-
-
-def is_valid_uuid(uuid_str: str) -> bool:
-    try:
-        UUID(uuid_str)
-        return True
-    except Exception:
-        return False
-
-
-def is_page_url(url: str):
-    parsed_url = urlparse(url)
-    path = parsed_url.path.split("/")[-1]
-    if parsed_url.netloc != "www.notion.so":
-        return False
-    if is_valid_uuid(path):
-        return False
-    strings = path.split("-")
-    if len(strings) > 0 and is_valid_uuid(strings[-1]):
-        return True
-    return False
-
-
-def is_database_url(url: str):
-    parsed_url = urlparse(url)
-    path = parsed_url.path.split("/")[-1]
-    if parsed_url.netloc != "www.notion.so":
-        return False
-    return is_valid_uuid(path)
-
-
-@dataclass
-class BuildTableResponse:
-    table_html: HtmlTag
-    child_pages: List[str] = field(default_factory=list)
-    child_databases: List[str] = field(default_factory=list)
-
-
-def build_table(client: Client, table: Block) -> BuildTableResponse:
-    if not isinstance(table.block, notion_blocks.Table):
-        raise ValueError(f"block type not table: {type(table.block)}")
-    rows: List[notion_blocks.TableRow] = []
-    child_pages: List[str] = []
-    child_databases: List[str] = []
-    for row_chunk in client.blocks.children.iterate_list(  # type: ignore
-        block_id=table.id,
-    ):
-        rows.extend(
-            [row.block for row in row_chunk if isinstance(row.block, notion_blocks.TableRow)],
-        )
-
-    # Extract child databases and pages
-    for row in rows:
-        for c in row.cells:
-            for rt in c.rich_texts:
-                if mention := rt.mention:
-                    if mention.type == "page" and (page := mention.page):
-                        child_pages.append(page.id)
-                    if mention.type == "database" and (database := mention.database):
-                        child_databases.append(database.id)
-
-    header: Optional[notion_blocks.TableRow] = None
-    if table.block.has_column_header:
-        header = rows.pop(0)
-    table_html_rows = []
-    if header:
-        header.is_header = True
-        table_html_rows.append(header.get_html())
-    table_html_rows.extend([row.get_html() for row in rows])
-    html_table = Table([], table_html_rows)
-
-    return BuildTableResponse(
-        table_html=html_table,
-        child_pages=child_pages,
-        child_databases=child_databases,
-    )
-
-
-def build_columned_list(client: Client, column_parent: Block) -> HtmlTag:
-    if not isinstance(column_parent.block, notion_blocks.ColumnList):
-        raise ValueError(f"block type not column list: {type(column_parent.block)}")
-    columns: List[Block] = []
-    for column_chunk in client.blocks.children.iterate_list(  # type: ignore
-        block_id=column_parent.id,
-    ):
-        columns.extend(column_chunk)
-    num_columns = len(columns)
-    columns_content = []
-    for column in columns:
-        for column_content_chunk in client.blocks.children.iterate_list(  # type: ignore
-            block_id=column.id,
-        ):
-            columns_content.append(
-                Div(
-                    [Style(f"width:{100/num_columns}%; float: left")],
-                    [content.block.get_html() for content in column_content_chunk],
-                ),
-            )
-
-    return Div([], columns_content)
-
-
-@dataclass
-class BulletedListResponse:
-    html: HtmlTag
-    child_list: Optional[HtmlTag] = None
-
-
-bulleted_list_styles = ["circle", "square", "disc"]
-
-
-def build_bulleted_list_children(
-    client: Client,
-    bulleted_list_item_parent: Block,
-    list_style_ind: int = 0,
-) -> BulletedListResponse:
-    if not isinstance(bulleted_list_item_parent.block, notion_blocks.BulletedListItem):
-        raise ValueError(
-            f"block type not bulleted list item: {type(bulleted_list_item_parent.block)}",
-        )
-    html = bulleted_list_item_parent.get_html()
-    if html:
-        html.attributes = [Style("margin-left: 10px")]
-    if not bulleted_list_item_parent.has_children:
-        return BulletedListResponse(
-            html=html,
-        )
-    children = []
-    for child_block in client.blocks.children.iterate_list(  # type: ignore
-        block_id=bulleted_list_item_parent.id,
-    ):
-        children.extend(child_block)
-    if not children:
-        return BulletedListResponse(
-            html=bulleted_list_item_parent.get_html(),
-        )
-    child_html = []
-    for child in children:
-        child_resp = build_bulleted_list_children(
-            client=client,
-            bulleted_list_item_parent=child,
-            list_style_ind=(list_style_ind + 1) % len(bulleted_list_styles),
-        )
-        child_html.append(child_resp.html)
-        if child_children := child_resp.child_list:
-            child_html.append(child_children)
-
-    return BulletedListResponse(
-        html=html,
-        child_list=Ul(
-            [Style(f"list-style-type: {bulleted_list_styles[list_style_ind]}")],
-            child_html,
-        ),
-    )
-
-
-@dataclass
-class NumberedListResponse:
-    html: HtmlTag
-    child_list: Optional[HtmlTag] = None
-
-
-numbered_list_types = ["a", "i", "1"]
-
-
-def build_numbered_list_children(
-    client: Client,
-    numbered_list_item_parent: Block,
-    type_attr_ind=0,
-) -> NumberedListResponse:
-    if not isinstance(numbered_list_item_parent.block, notion_blocks.NumberedListItem):
-        raise ValueError(
-            f"block type not numbered list item: {type(numbered_list_item_parent.block)}",
-        )
-    html = numbered_list_item_parent.get_html()
-    if html:
-        html.attributes = [Style("margin-left: 10px")]
-    if not numbered_list_item_parent.has_children:
-        return NumberedListResponse(
-            html=html,
-        )
-    children = []
-    for child_block in client.blocks.children.iterate_list(  # type: ignore
-        block_id=numbered_list_item_parent.id,
-    ):
-        children.extend(child_block)
-    if not children:
-        return NumberedListResponse(
-            html=numbered_list_item_parent.get_html(),
-        )
-    child_html = []
-    for child in children:
-        child_resp = build_numbered_list_children(
-            client=client,
-            numbered_list_item_parent=child,
-            type_attr_ind=(type_attr_ind + 1) % len(numbered_list_types),
-        )
-        child_html.append(child_resp.html)
-        if child_children := child_resp.child_list:
-            child_html.append(child_children)
-
-    return NumberedListResponse(
-        html=html,
-        child_list=Ol([Type(numbered_list_types[type_attr_ind])], child_html),
-    )
diff --git a/src/unstructured/ingest/connector/notion/interfaces.py b/src/unstructured/ingest/connector/notion/interfaces.py
deleted file mode 100644
index bcfa788..0000000
--- a/src/unstructured/ingest/connector/notion/interfaces.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-
-class FromJSONMixin(ABC):
-    @classmethod
-    @abstractmethod
-    def from_dict(cls, data: dict):
-        pass
-
-
-class GetHTMLMixin(ABC):
-    @abstractmethod
-    def get_html(self) -> Optional[HtmlTag]:
-        pass
-
-
-class BlockBase(FromJSONMixin, GetHTMLMixin):
-    @staticmethod
-    @abstractmethod
-    def can_have_children() -> bool:
-        pass
-
-
-class DBPropertyBase(FromJSONMixin):
-    pass
-
-
-class DBCellBase(FromJSONMixin, GetHTMLMixin):
-    pass
diff --git a/src/unstructured/ingest/connector/notion/types/__init__.py b/src/unstructured/ingest/connector/notion/types/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unstructured/ingest/connector/notion/types/block.py b/src/unstructured/ingest/connector/notion/types/block.py
deleted file mode 100644
index b5d36e3..0000000
--- a/src/unstructured/ingest/connector/notion/types/block.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# https://developers.notion.com/reference/page
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types import blocks
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-block_type_mapping = {
-    "bookmark": blocks.Bookmark,
-    "breadcrumb": blocks.Breadcrumb,
-    "bulleted_list_item": blocks.BulletedListItem,
-    "callout": blocks.Callout,
-    "child_database": blocks.ChildDatabase,
-    "child_page": blocks.ChildPage,
-    "code": blocks.Code,
-    "column": blocks.Column,
-    "column_list": blocks.ColumnList,
-    "divider": blocks.Divider,
-    "heading_1": blocks.Heading,
-    "heading_2": blocks.Heading,
-    "heading_3": blocks.Heading,
-    "embed": blocks.Embed,
-    "equation": blocks.Equation,
-    "file": blocks.File,
-    "image": blocks.Image,
-    "link_preview": blocks.LinkPreview,
-    "numbered_list_item": blocks.NumberedListItem,
-    "paragraph": blocks.Paragraph,
-    "pdf": blocks.PDF,
-    "quote": blocks.Quote,
-    "synced_block": blocks.SyncBlock,
-    "table": blocks.Table,
-    "table_of_contents": blocks.TableOfContents,
-    "table_row": blocks.TableRow,
-    "template": blocks.Template,
-    "to_do": blocks.ToDo,
-    "toggle": blocks.Toggle,
-    "unsupported": blocks.Unsupported,
-    "video": blocks.Video,
-}
-
-
-@dataclass
-class Block(FromJSONMixin, GetHTMLMixin):
-    id: str
-    type: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    has_children: bool
-    parent: Parent
-    block: BlockBase
-    object: str = "block"
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(id={self.id}, type={self.type})"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        block_data = data.pop(t)
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        parent = data.pop("parent")
-        try:
-            block = cls(
-                created_by=PartialUser.from_dict(created_by),
-                last_edited_by=PartialUser.from_dict(last_edited_by),
-                parent=Parent.from_dict(parent),
-                block=block_type_mapping[t].from_dict(block_data),  # type: ignore
-                **data,
-            )
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated block type -> {t}: {block_data}") from ke
-        except TypeError as te:
-            raise TypeError(f"failed to map to associated block type -> {t}: {block_data}") from te
-
-        return block
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.block:
-            return self.block.get_html()
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/__init__.py b/src/unstructured/ingest/connector/notion/types/blocks/__init__.py
deleted file mode 100644
index 14e0467..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from .bookmark import Bookmark
-from .breadcrumb import Breadcrumb
-from .bulleted_list_item import BulletedListItem
-from .callout import Callout
-from .child_database import ChildDatabase
-from .child_page import ChildPage
-from .code import Code
-from .column_list import Column, ColumnList
-from .divider import Divider
-from .embed import Embed
-from .equation import Equation
-from .file import File
-from .heading import Heading
-from .image import Image
-from .link_preview import LinkPreview
-from .numbered_list import NumberedListItem
-from .paragraph import Paragraph
-from .pdf import PDF
-from .quote import Quote
-from .synced_block import DuplicateSyncedBlock, OriginalSyncedBlock, SyncBlock
-from .table import Table, TableRow
-from .table_of_contents import TableOfContents
-from .template import Template
-from .todo import ToDo
-from .toggle import Toggle
-from .unsupported import Unsupported
-from .video import Video
-
-__all__ = [
-    "Bookmark",
-    "Breadcrumb",
-    "BulletedListItem",
-    "Callout",
-    "ChildDatabase",
-    "ChildPage",
-    "Code",
-    "Column",
-    "ColumnList",
-    "Divider",
-    "Embed",
-    "Equation",
-    "File",
-    "Heading",
-    "Image",
-    "LinkPreview",
-    "NumberedListItem",
-    "Paragraph",
-    "PDF",
-    "Quote",
-    "SyncBlock",
-    "OriginalSyncedBlock",
-    "DuplicateSyncedBlock",
-    "Table",
-    "TableRow",
-    "TableOfContents",
-    "Template",
-    "ToDo",
-    "Toggle",
-    "Unsupported",
-    "Video",
-]
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/bookmark.py b/src/unstructured/ingest/connector/notion/types/blocks/bookmark.py
deleted file mode 100644
index 4680447..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/bookmark.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# https://developers.notion.com/reference/block#bookmark
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Bookmark(BlockBase):
-    url: str
-    caption: List[RichText] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        captions = data.pop("caption", [])
-        return cls(
-            url=data["url"],
-            caption=[RichText.from_dict(c) for c in captions],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.url:
-            texts.append(A([Href(self.url)], self.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py b/src/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py
deleted file mode 100644
index d6b1626..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# https://developers.notion.com/reference/block#breadcrumb
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Breadcrumb(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        pass
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py b/src/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py
deleted file mode 100644
index 5db911d..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# https://developers.notion.com/reference/block#bulleted-list-item
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Li
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class BulletedListItem(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        return cls(
-            color=data["color"],
-            children=data.get("children", []),
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Li([], [rt.get_html() for rt in self.rich_text])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/callout.py b/src/unstructured/ingest/connector/notion/types/blocks/callout.py
deleted file mode 100644
index 6ea2bb1..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/callout.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# https://developers.notion.com/reference/block#callout
-from dataclasses import dataclass, field
-from typing import List, Optional, Union
-
-from htmlBuilder.attributes import Href, Style
-from htmlBuilder.tags import A, Div, HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class EmojiIcon(FromJSONMixin, GetHTMLMixin):
-    emoji: str
-    type: str = "emoji"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.emoji)
-
-
-@dataclass
-class ExternalIconContent(FromJSONMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class ExternalIcon(FromJSONMixin, GetHTMLMixin):
-    external: ExternalIconContent
-    type: str = "external"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(external=ExternalIconContent.from_dict(data=data.pop("external")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return A([Href(self.external.url)], [self.external.url])
-        else:
-            return None
-
-
-class Icon(FromJSONMixin):
-    @classmethod
-    def from_dict(cls, data: dict) -> Union[EmojiIcon, ExternalIcon]:
-        t = data.get("type")
-        if t == "emoji":
-            return EmojiIcon.from_dict(data)
-        elif t == "external":
-            return ExternalIcon.from_dict(data)
-        else:
-            raise ValueError(f"Unexpected icon type: {t} ({data})")
-
-
-@dataclass
-class Callout(BlockBase):
-    color: str
-    icon: Optional[Union[EmojiIcon, ExternalIcon]] = None
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        return cls(
-            color=data["color"],
-            icon=Icon.from_dict(data.pop("icon")),
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if self.icon and self.icon.get_html():
-            elements.append(self.icon.get_html())
-        if self.rich_text:
-            elements.extend([rt.get_html() for rt in self.rich_text])
-        attributes = []
-        if self.color:
-            attributes.append(Style(f"color:{self.color}"))
-        return Div(attributes, elements)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/child_database.py b/src/unstructured/ingest/connector/notion/types/blocks/child_database.py
deleted file mode 100644
index 578b400..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/child_database.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#child-database
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class ChildDatabase(BlockBase):
-    title: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.title)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/child_page.py b/src/unstructured/ingest/connector/notion/types/blocks/child_page.py
deleted file mode 100644
index 6ee6f90..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/child_page.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#child-page
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag, P
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase, GetHTMLMixin
-
-
-@dataclass
-class ChildPage(BlockBase, GetHTMLMixin):
-    title: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return P([], self.title)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/code.py b/src/unstructured/ingest/connector/notion/types/blocks/code.py
deleted file mode 100644
index 3a6d80e..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/code.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# https://developers.notion.com/reference/block#code
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Br, Div, HtmlTag
-from htmlBuilder.tags import Code as HtmlCode
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Code(BlockBase):
-    language: str
-    rich_text: List[RichText] = field(default_factory=list)
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        caption = data.pop("caption", [])
-        return cls(
-            language=data["language"],
-            rich_text=[RichText.from_dict(rt) for rt in rich_text],
-            caption=[RichText.from_dict(c) for c in caption],
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.rich_text:
-            texts.append(HtmlCode([], [rt.get_html() for rt in self.rich_text]))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/column_list.py b/src/unstructured/ingest/connector/notion/types/blocks/column_list.py
deleted file mode 100644
index d2df367..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/column_list.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# https://developers.notion.com/reference/block#column-list-and-column
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class ColumnList(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class Column(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/divider.py b/src/unstructured/ingest/connector/notion/types/blocks/divider.py
deleted file mode 100644
index 33fc01e..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/divider.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# https://developers.notion.com/reference/block#divider
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Hr, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Divider(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Hr([Style("border-top: 3px solid #bbb")])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/embed.py b/src/unstructured/ingest/connector/notion/types/blocks/embed.py
deleted file mode 100644
index 561fe82..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/embed.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/block#embed
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Embed(BlockBase):
-    url: str
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(caption=[RichText.from_dict(d) for d in data.pop("caption", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.url:
-            texts.append(A([Href(self.url)], self.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/equation.py b/src/unstructured/ingest/connector/notion/types/blocks/equation.py
deleted file mode 100644
index ccab3d0..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/equation.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#equation
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Equation(BlockBase):
-    expression: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.expression)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/file.py b/src/unstructured/ingest/connector/notion/types/blocks/file.py
deleted file mode 100644
index ad7fe54..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/file.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/block#file
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import External
-from unstructured.ingest.connector.notion.types.file import File as FileContent
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class File(BlockBase):
-    type: str
-    external: Optional[External] = None
-    file: Optional[FileContent] = None
-    caption: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        caption = [RichText.from_dict(rt) for rt in data.pop("caption", [])]
-        t = data["type"]
-        file = cls(type=t, caption=caption)
-        if t == "external":
-            file.external = External.from_dict(data["external"])
-        elif t == "file":
-            file.file = FileContent.from_dict(data["file"])
-        return file
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.file:
-            texts.append(A([Href(self.file.url)], self.file.url))
-        if self.external:
-            texts.append(A([Href(self.external.url)], self.external.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/heading.py b/src/unstructured/ingest/connector/notion/types/blocks/heading.py
deleted file mode 100644
index 86983f5..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/heading.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#headings
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Heading(BlockBase):
-    color: str
-    is_toggleable: bool
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        heading = cls(**data)
-        heading.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return heading
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/image.py b/src/unstructured/ingest/connector/notion/types/blocks/image.py
deleted file mode 100644
index d9c5203..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/image.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# https://developers.notion.com/reference/block#image
-from typing import Optional
-
-from htmlBuilder.attributes import Src
-from htmlBuilder.tags import HtmlTag, Img
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-class Image(BlockBase, FileObject):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return Img([Src(self.external.url)], [])
-        if self.file:
-            return Img([Src(self.file.url)], [])
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/link_preview.py b/src/unstructured/ingest/connector/notion/types/blocks/link_preview.py
deleted file mode 100644
index 913df1f..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/link_preview.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# https://developers.notion.com/reference/block#link-preview
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class LinkPreview(BlockBase):
-    url: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return A([Href(self.url)], self.url)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/numbered_list.py b/src/unstructured/ingest/connector/notion/types/blocks/numbered_list.py
deleted file mode 100644
index b0051bc..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/numbered_list.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# https://developers.notion.com/reference/block#numbered-list-item
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Li
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class NumberedListItem(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        numbered_list = cls(**data)
-        numbered_list.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return numbered_list
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Li([], [rt.get_html() for rt in self.rich_text])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/paragraph.py b/src/unstructured/ingest/connector/notion/types/blocks/paragraph.py
deleted file mode 100644
index bc31e4c..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/paragraph.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# https://developers.notion.com/reference/block#paragraph
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Paragraph(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        paragraph = cls(**data)
-        paragraph.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return paragraph
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return Br()
-        return Div([], [rt.get_html() for rt in self.rich_text])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/pdf.py b/src/unstructured/ingest/connector/notion/types/blocks/pdf.py
deleted file mode 100644
index 61ef3a8..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/pdf.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/block#pdf
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Br, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import External, File
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class PDF(BlockBase):
-    type: str
-    caption: List[RichText] = field(default_factory=list)
-    external: Optional[External] = None
-    file: Optional[File] = None
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        caption = data.pop("caption", [])
-        t = data["type"]
-        paragraph = cls(type=t)
-        paragraph.caption = [RichText.from_dict(c) for c in caption]
-        if t == "external":
-            paragraph.external = External.from_dict(data["external"])
-        elif t == "file":
-            paragraph.file = File.from_dict(data["file"])
-        return paragraph
-
-    def get_html(self) -> Optional[HtmlTag]:
-        texts = []
-        if self.external:
-            texts.append(A([Href(self.external.url)], self.external.url))
-        if self.file:
-            texts.append(A([Href(self.file.url)], self.file.url))
-        if self.caption:
-            texts.append(Div([], [rt.get_html() for rt in self.caption]))
-        if not texts:
-            return None
-        joined = [Br()] * (len(texts) * 2 - 1)
-        joined[0::2] = texts
-
-        return Div([], joined)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/quote.py b/src/unstructured/ingest/connector/notion/types/blocks/quote.py
deleted file mode 100644
index 1469f1d..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/quote.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#quote
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Quote(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        quote = cls(**data)
-        quote.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return quote
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/synced_block.py b/src/unstructured/ingest/connector/notion/types/blocks/synced_block.py
deleted file mode 100644
index b4cd2da..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/synced_block.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# https://developers.notion.com/reference/block#synced-block
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class OriginalSyncedBlock(BlockBase):
-    synced_from: Optional[str] = None
-    children: List[dict] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(children=data["children"])
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class DuplicateSyncedBlock(BlockBase):
-    type: str
-    block_id: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-class SyncBlock(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        if "synced_from" in data:
-            return OriginalSyncedBlock.from_dict(data)
-        else:
-            return DuplicateSyncedBlock.from_dict(data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/table.py b/src/unstructured/ingest/connector/notion/types/blocks/table.py
deleted file mode 100644
index 7858275..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/table.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# https://developers.notion.com/reference/block#table
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import HtmlTag, Td, Th, Tr
-
-from unstructured.ingest.connector.notion.interfaces import (
-    BlockBase,
-    FromJSONMixin,
-)
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Table(BlockBase):
-    table_width: int
-    has_column_header: bool
-    has_row_header: bool
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
-
-
-@dataclass
-class TableCell(FromJSONMixin):
-    rich_texts: List[RichText]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(rich_texts=[RichText.from_dict(rt) for rt in data.pop("rich_texts", [])])
-
-    def get_html(self, is_header: bool) -> Optional[HtmlTag]:
-        if is_header:
-            return Th([], [rt.get_html() for rt in self.rich_texts])
-        else:
-            return Td([], [rt.get_html() for rt in self.rich_texts])
-
-
-# https://developers.notion.com/reference/block#table-rows
-@dataclass
-class TableRow(BlockBase):
-    is_header: bool = False
-    cells: List[TableCell] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        cells = data.get("cells", [])
-        return cls(cells=[TableCell.from_dict({"rich_texts": c}) for c in cells])
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Tr([], [cell.get_html(is_header=self.is_header) for cell in self.cells])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py b/src/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py
deleted file mode 100644
index f753f60..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# https://developers.notion.com/reference/block#table-of-contents
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class TableOfContents(BlockBase):
-    color: str
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/template.py b/src/unstructured/ingest/connector/notion/types/blocks/template.py
deleted file mode 100644
index 4505687..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/template.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# https://developers.notion.com/reference/block#template
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Template(BlockBase):
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        template = cls(**data)
-        template.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return template
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-        return Div([], [rt.get_html() for rt in self.rich_text])
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/todo.py b/src/unstructured/ingest/connector/notion/types/blocks/todo.py
deleted file mode 100644
index 3e03b2c..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/todo.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# https://developers.notion.com/reference/block#to-do
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Checked, Style, Type
-from htmlBuilder.tags import Div, HtmlTag, Input
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class ToDo(BlockBase):
-    color: str
-    checked: bool = False
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        todo = cls(**data)
-        todo.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return todo
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        elements = []
-        check_input_attributes = [Type("checkbox")]
-        if self.checked:
-            check_input_attributes.append(Checked(""))
-        elements.append(Input(check_input_attributes))
-        elements.extend([rt.get_html() for rt in self.rich_text])
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, elements)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/toggle.py b/src/unstructured/ingest/connector/notion/types/blocks/toggle.py
deleted file mode 100644
index 8619eb7..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/toggle.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/block#toggle-blocks
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Toggle(BlockBase):
-    color: str
-    children: List[dict] = field(default_factory=list)
-    rich_text: List[RichText] = field(default_factory=list)
-
-    @staticmethod
-    def can_have_children() -> bool:
-        return True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        rich_text = data.pop("rich_text", [])
-        toggle = cls(**data)
-        toggle.rich_text = [RichText.from_dict(rt) for rt in rich_text]
-        return toggle
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-
-        texts = [rt.get_html() for rt in self.rich_text]
-        attributes = []
-        if self.color and self.color != "default":
-            attributes.append(Style(f"color: {self.color}"))
-        return Div(attributes, texts)
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/unsupported.py b/src/unstructured/ingest/connector/notion/types/blocks/unsupported.py
deleted file mode 100644
index 6e28b8c..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/unsupported.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-
-
-@dataclass
-class Unsupported(BlockBase):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/blocks/video.py b/src/unstructured/ingest/connector/notion/types/blocks/video.py
deleted file mode 100644
index 2523adf..0000000
--- a/src/unstructured/ingest/connector/notion/types/blocks/video.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# https://developers.notion.com/reference/block#image
-from typing import Optional
-
-from htmlBuilder.attributes import Src
-from htmlBuilder.tags import HtmlTag, Source
-from htmlBuilder.tags import Video as VideoHtml
-
-from unstructured.ingest.connector.notion.interfaces import BlockBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-class Video(BlockBase, FileObject):
-    @staticmethod
-    def can_have_children() -> bool:
-        return False
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.external:
-            return VideoHtml([], [Source([Src(self.external.url)], [self.external.url])])
-        if self.file:
-            return VideoHtml([], [Source([Src(self.file.url)], [self.file.url])])
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database.py b/src/unstructured/ingest/connector/notion/types/database.py
deleted file mode 100644
index b2372a7..0000000
--- a/src/unstructured/ingest/connector/notion/types/database.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# https://developers.notion.com/reference/database
-from dataclasses import dataclass, field
-from typing import Dict, List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBPropertyBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.database_properties import (
-    map_properties,
-)
-from unstructured.ingest.connector.notion.types.file import FileObject
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-
-@dataclass
-class Database(FromJSONMixin, GetHTMLMixin):
-    id: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    parent: Parent
-    url: str
-    is_inline: bool
-    public_url: str
-    properties: Dict[str, DBPropertyBase] = field(default_factory=dict)
-    title: List[RichText] = field(default_factory=list)
-    description: List[RichText] = field(default_factory=list)
-    icon: Optional[FileObject] = None
-    cover: Optional[FileObject] = None
-    object: str = "database"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        icon = data.pop("icon")
-        cover = data.pop("cover")
-        parent = data.pop("parent")
-        title = data.pop("title")
-        description = data.pop("description")
-        page = cls(
-            properties=map_properties(data.pop("properties", {})),
-            created_by=PartialUser.from_dict(created_by),
-            last_edited_by=PartialUser.from_dict(last_edited_by),
-            icon=FileObject.from_dict(icon) if icon else None,
-            cover=FileObject.from_dict(cover) if cover else None,
-            parent=Parent.from_dict(parent),
-            title=[RichText.from_dict(data=r) for r in title],
-            description=[RichText.from_dict(data=r) for r in description],
-            **data,
-        )
-
-        return page
-
-    def get_html(self) -> Optional[HtmlTag]:
-        spans = []
-        if title := self.title:
-            spans.append(Span([], [rt.get_html() for rt in title]))
-        if description := self.description:
-            spans.append(Span([], [rt.get_html() for rt in description]))
-        if spans:
-            return Div([], spans)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/__init__.py b/src/unstructured/ingest/connector/notion/types/database_properties/__init__.py
deleted file mode 100644
index 1001113..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/__init__.py
+++ /dev/null
@@ -1,106 +0,0 @@
-from typing import Dict
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-from .checkbox import Checkbox, CheckboxCell
-from .created_by import CreatedBy, CreatedByCell
-from .created_time import CreatedTime, CreatedTimeCell
-from .date import Date, DateCell
-from .email import Email, EmailCell
-from .files import Files, FilesCell
-from .formula import Formula, FormulaCell
-from .last_edited_by import LastEditedBy, LastEditedByCell
-from .last_edited_time import LastEditedTime, LastEditedTimeCell
-from .multiselect import MultiSelect, MultiSelectCell
-from .number import Number, NumberCell
-from .people import People, PeopleCell
-from .phone_number import PhoneNumber, PhoneNumberCell
-from .relation import Relation, RelationCell
-from .rich_text import RichText, RichTextCell
-from .rollup import Rollup, RollupCell
-from .select import Select, SelectCell
-from .status import Status, StatusCell
-from .title import Title, TitleCell
-from .unique_id import UniqueID, UniqueIDCell
-from .url import URL, URLCell
-from .verification import Verification, VerificationCell
-
-db_prop_type_mapping = {
-    "checkbox": Checkbox,
-    "created_by": CreatedBy,
-    "created_time": CreatedTime,
-    "date": Date,
-    "email": Email,
-    "files": Files,
-    "formula": Formula,
-    "last_edited_by": LastEditedBy,
-    "last_edited_time": LastEditedTime,
-    "multi_select": MultiSelect,
-    "number": Number,
-    "people": People,
-    "phone_number": PhoneNumber,
-    "relation": Relation,
-    "rich_text": RichText,
-    "rollup": Rollup,
-    "select": Select,
-    "status": Status,
-    "title": Title,
-    "unique_id": UniqueID,
-    "url": URL,
-    "verification": Verification,
-}
-
-
-def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
-    mapped_dict = {}
-    for k, v in props.items():
-        try:
-            mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v)  # type: ignore
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
-
-    return mapped_dict
-
-
-db_cell_type_mapping = {
-    "checkbox": CheckboxCell,
-    "created_by": CreatedByCell,
-    "created_time": CreatedTimeCell,
-    "date": DateCell,
-    "email": EmailCell,
-    "files": FilesCell,
-    "formula": FormulaCell,
-    "last_edited_by": LastEditedByCell,
-    "last_edited_time": LastEditedTimeCell,
-    "multi_select": MultiSelectCell,
-    "number": NumberCell,
-    "people": PeopleCell,
-    "phone_number": PhoneNumberCell,
-    "relation": RelationCell,
-    "rich_text": RichTextCell,
-    "rollup": RollupCell,
-    "select": SelectCell,
-    "status": StatusCell,
-    "title": TitleCell,
-    "unique_id": UniqueIDCell,
-    "url": URLCell,
-    "verification": VerificationCell,
-}
-
-
-def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
-    mapped_dict = {}
-    for k, v in props.items():
-        try:
-            t = v["type"]
-            mapped_dict[k] = db_cell_type_mapping[t].from_dict(v)  # type: ignore
-        except KeyError as ke:
-            raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
-
-    return mapped_dict
-
-
-__all__ = [
-    "map_properties",
-    "map_cells",
-]
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/checkbox.py b/src/unstructured/ingest/connector/notion/types/database_properties/checkbox.py
deleted file mode 100644
index b60d187..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/checkbox.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# https://developers.notion.com/reference/property-object#checkbox
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Checked, Type
-from htmlBuilder.tags import Div, HtmlTag, Input
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class Checkbox(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "checkbox"
-    checkbox: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CheckboxCell(DBCellBase):
-    id: str
-    checkbox: bool
-    name: Optional[str] = None
-    type: str = "checkbox"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        check_input_attributes = [Type("checkbox")]
-        if self.checkbox:
-            check_input_attributes.append(Checked(""))
-        return Div([], Input(check_input_attributes))
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/created_by.py b/src/unstructured/ingest/connector/notion/types/database_properties/created_by.py
deleted file mode 100644
index 034b0c1..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/created_by.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# https://developers.notion.com/reference/property-object#created-by
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class CreatedBy(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "created_by"
-    created_by: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CreatedByCell(DBCellBase):
-    id: str
-    created_by: People
-    type: str = "created_by"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(created_by=People.from_dict(data.pop("created_by")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return self.created_by.get_html()
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/created_time.py b/src/unstructured/ingest/connector/notion/types/database_properties/created_time.py
deleted file mode 100644
index 86c1173..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/created_time.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#created-time
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class CreatedTime(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "created_time"
-    created_time: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class CreatedTimeCell(DBCellBase):
-    id: str
-    created_time: str
-    type: str = "created_time"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.created_time)
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/date.py b/src/unstructured/ingest/connector/notion/types/database_properties/date.py
deleted file mode 100644
index 779ef60..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/date.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# https://developers.notion.com/reference/property-object#date
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.date import Date as DateType
-
-
-@dataclass
-class Date(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "date"
-    date: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class DateCell(DBCellBase):
-    id: str
-    date: Optional[DateType] = None
-    name: Optional[str] = None
-    type: str = "date"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        date = None
-        date_data = data.pop("date")
-        if date_data:
-            date = DateType.from_dict(date_data)
-        return cls(date=date, **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if date := self.date:
-            return date.get_html()
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/email.py b/src/unstructured/ingest/connector/notion/types/database_properties/email.py
deleted file mode 100644
index 1303770..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/email.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/property-object#email
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class Email(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "email"
-    email: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class EmailCell(DBCellBase):
-    id: str
-    email: str
-    name: Optional[str] = None
-    type: str = "email"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if email := self.email:
-            return Div([], email)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/files.py b/src/unstructured/ingest/connector/notion/types/database_properties/files.py
deleted file mode 100644
index 680ee15..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/files.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#files
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.file import FileObject
-
-
-@dataclass
-class Files(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "files"
-    files: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class FilesCell(DBCellBase):
-    id: str
-    files: List[FileObject]
-    type: str = "files"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.files:
-            return None
-        return Div([], [f.get_html() for f in self.files])
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/formula.py b/src/unstructured/ingest/connector/notion/types/database_properties/formula.py
deleted file mode 100644
index b192136..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/formula.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/property-object#formula
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class FormulaProp(FromJSONMixin):
-    expression: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Formula(DBPropertyBase):
-    id: str
-    name: str
-    formula: FormulaProp
-    type: str = "formula"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data)
-
-
-@dataclass
-class FormulaCell(DBCellBase):
-    id: str
-    formula: dict
-    type: str = "formula"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        formula = self.formula
-        t = formula.get("type")
-        return Div([], str(formula[t]))
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py b/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py
deleted file mode 100644
index a1a2d0a..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#last-edited-by
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class LastEditedBy(DBPropertyBase):
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-    def get_text(self) -> Optional[str]:
-        return None
-
-
-@dataclass
-class LastEditedByCell(DBCellBase):
-    id: str
-    last_edited_by: People
-    type: str = "last_edited_by"
-
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return self.last_edited_by.get_html()
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py b/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py
deleted file mode 100644
index 4c9e009..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# https://developers.notion.com/reference/property-object#last-edited-time
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class LastEditedTime(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "last_edited_time"
-    last_edited_time: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class LastEditedTimeCell(DBCellBase):
-    id: str
-    last_edited_time: str
-    type: str = "last_edited_time"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.last_edited_time)
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/multiselect.py b/src/unstructured/ingest/connector/notion/types/database_properties/multiselect.py
deleted file mode 100644
index 7534ab8..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/multiselect.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# https://developers.notion.com/reference/property-object#multi-select
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class MultiSelectOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class MultiSelectProp(FromJSONMixin):
-    options: List[MultiSelectOption] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])])
-
-
-@dataclass
-class MultiSelect(DBPropertyBase):
-    id: str
-    name: str
-    multi_select: MultiSelectProp
-    type: str = "multi_select"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            multi_select=data.pop("multi_select", {}),
-            **data,
-        )
-
-
-@dataclass
-class MultiSelectCell(DBCellBase):
-    id: str
-    multi_select: List[MultiSelectOption]
-    type: str = "multi_select"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])],
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.multi_select:
-            return None
-        option_spans = []
-        for option in self.multi_select:
-            option_attributes = []
-            if option.color and option.color != "default":
-                option_attributes.append(Style(f"color: {option.color}"))
-            option_spans.append(Span(option_attributes, option.name))
-        return Div([], option_spans)
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/number.py b/src/unstructured/ingest/connector/notion/types/database_properties/number.py
deleted file mode 100644
index 599981f..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/number.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# https://developers.notion.com/reference/property-object#number
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class NumberProp(FromJSONMixin):
-    format: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Number(DBPropertyBase):
-    id: str
-    name: str
-    number: NumberProp
-    type: str = "number"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(number=NumberProp.from_dict(data.pop("number")), **data)
-
-
-@dataclass
-class NumberCell(DBCellBase):
-    id: str
-    number: Optional[int] = None
-    type: str = "number"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if number := self.number:
-            return Div([], str(number))
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/people.py b/src/unstructured/ingest/connector/notion/types/database_properties/people.py
deleted file mode 100644
index 44e66b2..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/people.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# https://developers.notion.com/reference/property-object#people
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.user import People as PeopleType
-
-
-@dataclass
-class People(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "people"
-    people: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class PeopleCell(DBCellBase):
-    id: str
-    people: List[PeopleType]
-    type: str = "people"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.people:
-            return None
-        people_spans = []
-        for person in self.people:
-            people_spans.append(Span([], person.get_html()))
-        return Div([], people_spans)
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/phone_number.py b/src/unstructured/ingest/connector/notion/types/database_properties/phone_number.py
deleted file mode 100644
index 58a5c91..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/phone_number.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# https://developers.notion.com/reference/property-object#phone-number
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class PhoneNumber(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "phone_number"
-    phone_number: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class PhoneNumberCell(DBCellBase):
-    id: str
-    phone_number: Optional[str]
-    name: Optional[str] = None
-    type: str = "phone_number"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if phone_number := self.phone_number:
-            return Div([], phone_number)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/relation.py b/src/unstructured/ingest/connector/notion/types/database_properties/relation.py
deleted file mode 100644
index 35c283a..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/relation.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# https://developers.notion.com/reference/property-object#relation
-from dataclasses import dataclass
-from typing import Optional
-from urllib.parse import unquote
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class DualProperty(FromJSONMixin):
-    synced_property_id: str
-    synced_property_name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RelationProp(FromJSONMixin):
-    database_id: str
-    type: str
-    dual_property: DualProperty
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data.get("type")
-        if t == "dual_property":
-            dual_property = DualProperty.from_dict(data.pop(t))
-        else:
-            raise ValueError(f"{t} type not recognized")
-
-        return cls(dual_property=dual_property, **data)
-
-
-@dataclass
-class Relation(DBPropertyBase):
-    id: str
-    name: str
-    relation: RelationProp
-    type: str = "relation"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(relation=RelationProp.from_dict(data.pop("relation")), **data)
-
-
-@dataclass
-class RelationCell(DBCellBase):
-    id: str
-    has_more: bool
-    relation: list
-    type: str = "relation"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], unquote(self.id))
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/rich_text.py b/src/unstructured/ingest/connector/notion/types/database_properties/rich_text.py
deleted file mode 100644
index 2bd56c2..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/rich_text.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# https://developers.notion.com/reference/property-object#rich-text
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.rich_text import (
-    RichText as RichTextType,
-)
-
-
-@dataclass
-class RichText(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "rich_text"
-    rich_text: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RichTextCell(DBCellBase):
-    id: str
-    rich_text: List[RichTextType]
-    name: Optional[str] = None
-    type: str = "rich_text"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            rich_text=[RichTextType.from_dict(rt) for rt in data.pop("rich_text", [])],
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.rich_text:
-            return None
-        spans = [Span([], rt.get_html()) for rt in self.rich_text]
-        return Div([], spans)
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/rollup.py b/src/unstructured/ingest/connector/notion/types/database_properties/rollup.py
deleted file mode 100644
index 5134b40..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/rollup.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# https://developers.notion.com/reference/property-object#rollup
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class RollupProp(FromJSONMixin):
-    function: str
-    relation_property_id: str
-    relation_property_name: str
-    rollup_property_id: str
-    rollup_property_name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Rollup(DBPropertyBase):
-    id: str
-    name: str
-    rollup: RollupProp
-    type: str = "rollup"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(rollup=RollupProp.from_dict(data.pop("rollup")), **data)
-
-
-@dataclass
-class RollupCell(DBCellBase):
-    id: str
-    rollup: dict
-    type: str = "rollup"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        rollup = self.rollup
-        t = rollup.get("type")
-        v = rollup[t]
-        if isinstance(v, list):
-            return Div([], [Span([], str(x)) for x in v])
-        return Div([], str(v))
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/select.py b/src/unstructured/ingest/connector/notion/types/database_properties/select.py
deleted file mode 100644
index 550f2ff..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/select.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# https://developers.notion.com/reference/property-object#select
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class SelectOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class SelectProp(FromJSONMixin):
-    options: List[SelectOption] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])])
-
-
-@dataclass
-class Select(DBPropertyBase):
-    id: str
-    name: str
-    select: SelectProp
-    type: str = "select"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(select=SelectProp.from_dict(data.pop("select", {})), **data)
-
-
-@dataclass
-class SelectCell(DBCellBase):
-    id: str
-    select: Optional[SelectOption]
-    type: str = "select"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        select_data = data.pop("select")
-        select = None
-        if select_data:
-            select = SelectOption.from_dict(select_data)
-        return cls(select=select, **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if select := self.select:
-            select_attr = []
-            if select.color and select.color != "default":
-                select_attr.append(Style(f"color: {select.color}"))
-            return Div(select_attr, select.name)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/status.py b/src/unstructured/ingest/connector/notion/types/database_properties/status.py
deleted file mode 100644
index 8139b98..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/status.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# https://developers.notion.com/reference/property-object#status
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.attributes import Style
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class StatusOption(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class StatusGroup(FromJSONMixin):
-    color: str
-    id: str
-    name: str
-    option_ids: List[str] = field(default_factory=List[str])
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class StatusProp(FromJSONMixin):
-    options: List[StatusOption] = field(default_factory=list)
-    groups: List[StatusGroup] = field(default_factory=list)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(
-            options=[StatusOption.from_dict(o) for o in data.get("options", [])],
-            groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])],
-        )
-
-
-@dataclass
-class Status(DBPropertyBase):
-    id: str
-    name: str
-    status: StatusProp
-    type: str = "status"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(status=StatusProp.from_dict(data.pop("status", {})), **data)
-
-
-@dataclass
-class StatusCell(DBCellBase):
-    id: str
-    status: Optional[StatusOption]
-    type: str = "status"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(status=StatusOption.from_dict(data.pop("status", {})), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if status := self.status:
-            select_attr = []
-            if status.color and status.color != "default":
-                select_attr.append(Style(f"color: {status.color}"))
-            return Div(select_attr, status.name)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/title.py b/src/unstructured/ingest/connector/notion/types/database_properties/title.py
deleted file mode 100644
index aaee0e6..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/title.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#title
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-from unstructured.ingest.connector.notion.types.rich_text import RichText
-
-
-@dataclass
-class Title(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "title"
-    title: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class TitleCell(DBCellBase):
-    id: str
-    title: List[RichText]
-    type: str = "title"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(title=[RichText.from_dict(rt) for rt in data.pop("title", [])], **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if not self.title:
-            return None
-        return Div([], [rt.get_html() for rt in self.title])
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/unique_id.py b/src/unstructured/ingest/connector/notion/types/database_properties/unique_id.py
deleted file mode 100644
index 643f2c0..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/unique_id.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# https://developers.notion.com/reference/property-object#title
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-)
-
-
-@dataclass
-class UniqueID(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "unique_id"
-    unique_id: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class UniqueIDCellData(FromJSONMixin):
-    prefix: str
-    number: int
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class UniqueIDCell(DBCellBase):
-    id: str
-    unique_id: Optional[UniqueIDCellData]
-    type: str = "title"
-    name: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(unique_id=UniqueIDCellData.from_dict(data.pop("unique_id")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if unique_id := self.unique_id:
-            return Div([], f"{unique_id.prefix}-{unique_id.number}")
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/url.py b/src/unstructured/ingest/connector/notion/types/database_properties/url.py
deleted file mode 100644
index 8233ae9..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/url.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# https://developers.notion.com/reference/property-object#url
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
-
-
-@dataclass
-class URL(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "url"
-    url: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class URLCell(DBCellBase):
-    id: str
-    url: Optional[str] = None
-    name: Optional[str] = None
-    type: str = "url"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if url := self.url:
-            return A([Href(url)], url)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/database_properties/verification.py b/src/unstructured/ingest/connector/notion/types/database_properties/verification.py
deleted file mode 100644
index 03ade8e..0000000
--- a/src/unstructured/ingest/connector/notion/types/database_properties/verification.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# https://developers.notion.com/reference/property-object#url
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag, Span
-
-from unstructured.ingest.connector.notion.interfaces import (
-    DBCellBase,
-    DBPropertyBase,
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.date import Date
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class Verification(DBPropertyBase):
-    id: str
-    name: str
-    type: str = "verification"
-    verification: dict = field(default_factory=dict)
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class VerificationData(FromJSONMixin, GetHTMLMixin):
-    state: Optional[str]
-    verified_by: Optional[People]
-    date: Optional[Date]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        verified_by = data.pop("verified_by", None)
-        date = data.pop("date", None)
-        return cls(
-            verified_by=People.from_dict(data=verified_by) if verified_by else None,
-            date=Date.from_dict(data=date) if date else None,
-            **data,
-        )
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if state := self.state:
-            elements.append(Span([], state))
-        if (verified_by := self.verified_by) and (verified_by_html := verified_by.get_html()):
-            elements.append(verified_by_html)
-        if (date := self.date) and (date_html := date.get_html()):
-            elements.append(date_html)
-        if elements:
-            return Div([], elements)
-        return None
-
-
-@dataclass
-class VerificationCell(DBCellBase):
-    id: str
-    verification: Optional[VerificationData]
-    name: Optional[str] = None
-    type: str = "verification"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(verification=VerificationData.from_dict(data.pop("verification")), **data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        elements = []
-        if name := self.name:
-            elements.append(Span([], name))
-        if (verification := self.verification) and (verification_html := verification.get_html()):
-            elements.append(verification_html)
-
-        if elements:
-            return Div([], elements)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/date.py b/src/unstructured/ingest/connector/notion/types/date.py
deleted file mode 100644
index 7c6dcf1..0000000
--- a/src/unstructured/ingest/connector/notion/types/date.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# https://developers.notion.com/reference/property-value-object#date-property-values
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.tags import Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class Date(FromJSONMixin, GetHTMLMixin):
-    start: str
-    end: Optional[str] = None
-    time_zone: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        text = f"{self.start}"
-        if end := self.end:
-            text += f" - {end}"
-        if self.time_zone:
-            text += f" {self.time_zone}"
-        return Div([], text)
diff --git a/src/unstructured/ingest/connector/notion/types/file.py b/src/unstructured/ingest/connector/notion/types/file.py
deleted file mode 100644
index 6ade2d1..0000000
--- a/src/unstructured/ingest/connector/notion/types/file.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# https://developers.notion.com/reference/file-object
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class External(FromJSONMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class File(FromJSONMixin):
-    url: str
-    expiry_time: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class FileObject(FromJSONMixin, GetHTMLMixin):
-    type: str
-    external: Optional[External] = None
-    file: Optional[File] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        file_object = cls(type=t)
-        if t == "external":
-            file_object.external = External.from_dict(data["external"])
-        elif t == "file":
-            file_object.file = File.from_dict(data["file"])
-        return file_object
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.file:
-            return A([Href(self.file.url)], self.file.url)
-        if self.external:
-            return A([Href(self.external.url)], self.external.url)
-        return None
diff --git a/src/unstructured/ingest/connector/notion/types/page.py b/src/unstructured/ingest/connector/notion/types/page.py
deleted file mode 100644
index 42bbb29..0000000
--- a/src/unstructured/ingest/connector/notion/types/page.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# https://developers.notion.com/reference/page
-from dataclasses import dataclass
-from typing import Optional
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin
-from unstructured.ingest.connector.notion.types.file import FileObject
-from unstructured.ingest.connector.notion.types.parent import Parent
-from unstructured.ingest.connector.notion.types.user import PartialUser
-
-
-@dataclass
-class Page(FromJSONMixin):
-    id: str
-    created_time: str
-    created_by: PartialUser
-    last_edited_time: str
-    last_edited_by: PartialUser
-    archived: bool
-    properties: dict
-    parent: Parent
-    url: str
-    public_url: str
-    object: str = "page"
-    icon: Optional[FileObject] = None
-    cover: Optional[FileObject] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        created_by = data.pop("created_by")
-        last_edited_by = data.pop("last_edited_by")
-        icon = data.pop("icon")
-        cover = data.pop("cover")
-        parent = data.pop("parent")
-        page = cls(
-            created_by=PartialUser.from_dict(created_by),
-            last_edited_by=PartialUser.from_dict(last_edited_by),
-            icon=FileObject.from_dict(icon) if icon else None,
-            cover=FileObject.from_dict(cover) if cover else None,
-            parent=Parent.from_dict(parent),
-            **data,
-        )
-
-        return page
diff --git a/src/unstructured/ingest/connector/notion/types/parent.py b/src/unstructured/ingest/connector/notion/types/parent.py
deleted file mode 100644
index f78c166..0000000
--- a/src/unstructured/ingest/connector/notion/types/parent.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# https://developers.notion.com/reference/parent-object
-from dataclasses import dataclass
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin
-
-
-# https://developers.notion.com/reference/parent-object#database-parent
-@dataclass
-class DatabaseParent(FromJSONMixin):
-    database_id: str
-    type: str = "database_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(database_id=data["database_id"])
-
-
-# https://developers.notion.com/reference/parent-object#page-parent
-@dataclass
-class PageParent(FromJSONMixin):
-    page_id: str
-    type: str = "page_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(page_id=data["page_id"])
-
-
-# https://developers.notion.com/reference/parent-object#workspace-parent
-@dataclass
-class WorkspaceParent(FromJSONMixin):
-    type: str = "workspace"
-    workspace: bool = True
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls()
-
-
-# https://developers.notion.com/reference/parent-object#block-parent
-@dataclass
-class BlockParent(FromJSONMixin):
-    block_id: str
-    type: str = "block_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(block_id=data["block_id"])
-
-
-@dataclass
-class Parent(FromJSONMixin):
-    block_id: str
-    type: str = "block_id"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        if t == "database_id":
-            return DatabaseParent.from_dict(data)
-        elif t == "page_id":
-            return PageParent.from_dict(data)
-        elif t == "workspace":
-            return WorkspaceParent.from_dict(data)
-        elif t == "block_id":
-            return BlockParent.from_dict(data)
diff --git a/src/unstructured/ingest/connector/notion/types/rich_text.py b/src/unstructured/ingest/connector/notion/types/rich_text.py
deleted file mode 100644
index ae71a0a..0000000
--- a/src/unstructured/ingest/connector/notion/types/rich_text.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# https://developers.notion.com/reference/rich-text
-from dataclasses import dataclass
-from typing import Optional
-
-from htmlBuilder.attributes import Href, Style
-from htmlBuilder.tags import A, B, Code, Div, HtmlTag, I, S, Span, U
-from htmlBuilder.tags import Text as HtmlText
-
-from unstructured.ingest.connector.notion.interfaces import (
-    FromJSONMixin,
-    GetHTMLMixin,
-)
-from unstructured.ingest.connector.notion.types.date import Date
-from unstructured.ingest.connector.notion.types.user import People
-
-
-@dataclass
-class Annotations(FromJSONMixin):
-    bold: bool
-    code: bool
-    italic: bool
-    strikethrough: bool
-    underline: bool
-    color: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Equation(FromJSONMixin, GetHTMLMixin):
-    expression: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Code([], self.expression) if self.expression else None
-
-
-@dataclass
-class MentionDatabase(FromJSONMixin, GetHTMLMixin):
-    id: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.id) if self.id else None
-
-
-@dataclass
-class MentionLinkPreview(FromJSONMixin, GetHTMLMixin):
-    url: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return A([Href(self.url)], self.url) if self.url else None
-
-
-@dataclass
-class MentionPage(FromJSONMixin, GetHTMLMixin):
-    id: str
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_html(self) -> Optional[HtmlTag]:
-        return Div([], self.id) if self.id else None
-
-
-@dataclass
-class MentionTemplate(FromJSONMixin):
-    template_mention_date: Optional[str]
-    template_mention_user: Optional[str]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class Mention(FromJSONMixin, GetHTMLMixin):
-    type: str
-    database: Optional[MentionDatabase] = None
-    date: Optional[Date] = None
-    link_preview: Optional[MentionLinkPreview] = None
-    page: Optional[MentionPage] = None
-    template_mention: Optional[MentionTemplate] = None
-    user: Optional[People] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        mention = cls(type=t)
-        if t == "date":
-            mention.date = Date.from_dict(data["date"])
-        elif t == "database":
-            mention.database = MentionDatabase.from_dict(data["database"])
-        elif t == "link_preview":
-            mention.link_preview = MentionLinkPreview.from_dict(data["link_preview"])
-        elif t == "page":
-            mention.page = MentionPage.from_dict(data["page"])
-        elif t == "template_mention":
-            mention.template_mention = MentionTemplate.from_dict(data["template_mention"])
-        elif t == "user":
-            mention.user = People.from_dict(data["user"])
-
-        return mention
-
-    def get_html(self) -> Optional[HtmlTag]:
-        t = self.type
-        if t == "date":
-            return self.date.get_html() if self.date else None
-        elif t == "database":
-            return self.database.get_html() if self.database else None
-        elif t == "link_preview":
-            return self.link_preview.get_html() if self.link_preview else None
-        elif t == "page":
-            return self.page.get_html() if self.page else None
-        elif t == "user":
-            return self.user.get_html() if self.user else None
-        return None
-
-
-@dataclass
-class Text(FromJSONMixin):
-    content: str
-    link: Optional[dict]
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-
-@dataclass
-class RichText(FromJSONMixin, GetHTMLMixin):
-    type: str
-    plain_text: str
-    annotations: Optional[Annotations] = None
-    href: Optional[str] = None
-    text: Optional[Text] = None
-    mention: Optional[Mention] = None
-    equation: Optional[Equation] = None
-
-    def get_html(self) -> Optional[HtmlTag]:
-        text = HtmlText(self.plain_text)
-        if self.href:
-            text = A([Href(self.href)], text)
-        if self.annotations:
-            annotations = self.annotations
-            if annotations.bold:
-                text = B([], text)
-            if annotations.code:
-                text = Code([], text)
-            if annotations.italic:
-                text = I([], text)
-            if annotations.strikethrough:
-                text = S([], text)
-            if annotations.underline:
-                text = U([], text)
-            if annotations.color and annotations.color != "default":
-                if isinstance(text, HtmlText):
-                    text = Span([], text)
-                text.attributes.append(Style(f"color:{annotations.color}"))
-        return text
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        t = data["type"]
-        rich_text = cls(
-            annotations=Annotations.from_dict(data.pop("annotations")),
-            **data,
-        )
-        if t == "text":
-            rich_text.text = Text.from_dict(data["text"])
-        elif t == "mention":
-            rich_text.mention = Mention.from_dict(data["mention"])
-        elif t == "equation":
-            rich_text.equation = Equation.from_dict(data["equation"])
-
-        return rich_text
diff --git a/src/unstructured/ingest/connector/notion/types/user.py b/src/unstructured/ingest/connector/notion/types/user.py
deleted file mode 100644
index 4574c0b..0000000
--- a/src/unstructured/ingest/connector/notion/types/user.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# https://developers.notion.com/reference/user
-from dataclasses import dataclass, field
-from typing import Optional
-
-from htmlBuilder.attributes import Href
-from htmlBuilder.tags import A, Div, HtmlTag
-
-from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin
-
-
-@dataclass
-class PartialUser(FromJSONMixin):
-    id: str
-    object: str = "user"
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(id=data["id"])
-
-
-@dataclass
-class User(FromJSONMixin, GetHTMLMixin):
-    object: dict
-    id: str
-    type: Optional[str] = None
-    name: Optional[str] = None
-    avatar_url: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_text(self) -> Optional[str]:
-        text = self.name
-        if self.avatar_url:
-            text = f"[{text}]({self.avatar_url}"
-        return text
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.avatar_url:
-            return A([Href(self.avatar_url)], self.name)
-        else:
-            return Div([], self.name)
-
-
-@dataclass
-class People(User):
-    person: dict = field(default_factory=dict)
-
-
-@dataclass
-class Bots(FromJSONMixin, GetHTMLMixin):
-    object: dict
-    id: str
-    bot: dict
-    owner: dict
-    type: str
-    workspace_name: str
-    name: Optional[str] = None
-    avatar_url: Optional[str] = None
-
-    @classmethod
-    def from_dict(cls, data: dict):
-        return cls(**data)
-
-    def get_text(self) -> Optional[str]:
-        text = self.name
-        if self.avatar_url:
-            text = f"[{text}]({self.avatar_url}"
-        return text
-
-    def get_html(self) -> Optional[HtmlTag]:
-        if self.avatar_url:
-            return A([Href(self.avatar_url)], self.name)
-        else:
-            return Div([], self.name)
diff --git a/src/unstructured/ingest/connector/onedrive.py b/src/unstructured/ingest/connector/onedrive.py
deleted file mode 100644
index 5f40ee0..0000000
--- a/src/unstructured/ingest/connector/onedrive.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, List, Optional
-
-from unstructured.file_utils.filetype import EXT_TO_FILETYPE
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from office365.onedrive.driveitems.driveItem import DriveItem
-
-MAX_MB_SIZE = 512_000_000
-
-
-@dataclass
-class SimpleOneDriveConfig(BaseConnectorConfig):
-    client_id: str
-    client_credential: str = field(repr=False)
-    user_pname: str
-    tenant: str = field(repr=False)
-    authority_url: Optional[str] = field(repr=False)
-    path: Optional[str] = field(default="")
-    recursive: bool = False
-
-    def __post_init__(self):
-        if not (self.client_id and self.client_credential and self.user_pname):
-            raise ValueError(
-                "Please provide all the following mandatory values:"
-                "\n-ms-client_id\n-ms-client_cred\n-ms-user-pname",
-            )
-        self.token_factory = self._acquire_token
-
-    @requires_dependencies(["msal"])
-    def _acquire_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.client_credential,
-            )
-            token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for OneDrive")
-            raise exc
-        return token
-
-
-@dataclass
-class OneDriveIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleOneDriveConfig
-    file: "DriveItem"
-
-    def __post_init__(self):
-        self.ext = "".join(Path(self.file.name).suffixes)
-        if not self.ext:
-            raise ValueError("Unsupported file without extension.")
-
-        if self.ext not in EXT_TO_FILETYPE:
-            raise ValueError(
-                f"Extension not supported. "
-                f"Value MUST be one of {', '.join([k for k in EXT_TO_FILETYPE if k is not None])}.",
-            )
-        self._set_download_paths()
-
-    def _set_download_paths(self) -> None:
-        """Parses the folder structure from the source and creates the download and output paths"""
-        download_path = Path(f"{self.standard_config.download_dir}")
-        output_path = Path(f"{self.standard_config.output_dir}")
-
-        if parent_ref := self.file.get_property("parentReference", "").path.split(":")[-1]:
-            odir = parent_ref[1:] if parent_ref[0] == "/" else parent_ref
-            download_path = download_path if odir == "" else (download_path / odir).resolve()
-            output_path = output_path if odir == "" else (output_path / odir).resolve()
-
-        self.download_dir = download_path
-        self.download_filepath = (download_path / self.file.name).resolve()
-        oname = f"{self.file.name[:-len(self.ext)]}.json"
-        self.output_dir = output_path
-        self.output_filepath = (output_path / oname).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(["office365"])
-    def get_file(self):
-        try:
-            fsize = self.file.get_property("size", 0)
-            self.output_dir.mkdir(parents=True, exist_ok=True)
-
-            if not self.download_dir.is_dir():
-                logger.debug(f"Creating directory: {self.download_dir}")
-                self.download_dir.mkdir(parents=True, exist_ok=True)
-
-            if fsize > MAX_MB_SIZE:
-                logger.info(f"Downloading file with size: {fsize} bytes in chunks")
-                with self.filename.open(mode="wb") as f:
-                    self.file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
-            else:
-                with self.filename.open(mode="wb") as f:
-                    self.file.download(f).execute_query()
-        except Exception as e:
-            logger.error(f"Error while downloading and saving file: {self.filename}.")
-            logger.error(e)
-            return
-        logger.info(f"File downloaded: {self.filename}")
-        return
-
-
-class OneDriveConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleOneDriveConfig
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: SimpleOneDriveConfig):
-        super().__init__(standard_config, config)
-        self._set_client()
-
-    @requires_dependencies(["office365"])
-    def _set_client(self):
-        from office365.graph_client import GraphClient
-
-        self.client = GraphClient(self.config.token_factory)
-
-    def _list_objects(self, folder, recursive) -> List["DriveItem"]:
-        drive_items = folder.children.get().execute_query()
-        files = [d for d in drive_items if d.is_file]
-        if not recursive:
-            return files
-        folders = [d for d in drive_items if d.is_folder]
-        for f in folders:
-            files += self._list_objects(f, recursive)
-        return files
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        root = self.client.users[self.config.user_pname].drive.get().execute_query().root
-        if fpath := self.config.path:
-            root = root.get_by_path(fpath).get().execute_query()
-            if root is None or not root.is_folder:
-                raise ValueError(f"Unable to find directory, given: {fpath}")
-        files = self._list_objects(root, self.config.recursive)
-        return [OneDriveIngestDoc(self.standard_config, self.config, f) for f in files]
diff --git a/src/unstructured/ingest/connector/outlook.py b/src/unstructured/ingest/connector/outlook.py
deleted file mode 100644
index 1943f2a..0000000
--- a/src/unstructured/ingest/connector/outlook.py
+++ /dev/null
@@ -1,230 +0,0 @@
-import hashlib
-import os
-from collections import defaultdict
-from dataclasses import dataclass, field
-from itertools import chain
-from pathlib import Path
-from typing import List, Optional
-
-from office365.onedrive.driveitems.driveItem import DriveItem
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-MAX_NUM_EMAILS = 1000000  # Maximum number of emails per folder
-
-
-class MissingFolderError(Exception):
-    """There are no root folders with those names."""
-
-
-@dataclass
-class SimpleOutlookConfig(BaseConnectorConfig):
-    """This class is getting the token."""
-
-    client_id: Optional[str]
-    client_credential: Optional[str] = field(repr=False)
-    user_email: str
-    tenant: Optional[str] = field(repr=False)
-    authority_url: Optional[str] = field(repr=False)
-    ms_outlook_folders: List[str]
-    recursive: bool = False
-
-    def __post_init__(self):
-        if not (self.client_id and self.client_credential and self.user_email):
-            raise ValueError(
-                "Please provide one of the following mandatory values:"
-                "\n--client_id\n--client_cred\n--user-email",
-            )
-        self.token_factory = self._acquire_token
-
-    @requires_dependencies(["msal"])
-    def _acquire_token(self):
-        from msal import ConfidentialClientApplication
-
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.client_credential,
-            )
-            token = app.acquire_token_for_client(
-                scopes=["https://graph.microsoft.com/.default"],
-            )
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for Outlook")
-            raise exc
-        return token
-
-    @staticmethod
-    def parse_folders(folder_str: str) -> List[str]:
-        """Parses a comma separated string of Outlook folders into a list."""
-        return [x.strip() for x in folder_str.split(",")]
-
-
-@dataclass
-class OutlookIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleOutlookConfig
-    file: DriveItem
-
-    def __post_init__(self):
-        self._set_download_paths()
-
-    def hash_mail_name(self, id):
-        """Outlook email ids are 152 char long. Hash to shorten to 16."""
-        return hashlib.sha256(id.encode("utf-8")).hexdigest()[:16]
-
-    def _set_download_paths(self) -> None:
-        """Creates paths for downloading and parsing."""
-        download_path = Path(f"{self.standard_config.download_dir}")
-        output_path = Path(f"{self.standard_config.output_dir}")
-
-        self.download_dir = download_path
-        self.download_filepath = (
-            download_path / f"{self.hash_mail_name(self.file.id)}.eml"
-        ).resolve()
-        oname = f"{self.hash_mail_name(self.file.id)}.eml.json"
-        self.output_dir = output_path
-        self.output_filepath = (output_path / oname).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(["office365"])
-    def get_file(self):
-        """Relies on Office365 python sdk message object to do the download."""
-        try:
-            if not self.download_dir.is_dir():
-                logger.debug(f"Creating directory: {self.download_dir}")
-                self.download_dir.mkdir(parents=True, exist_ok=True)
-
-            with open(
-                os.path.join(
-                    self.download_dir,
-                    self.hash_mail_name(self.file.id) + ".eml",
-                ),
-                "wb",
-            ) as local_file:
-                self.file.download(
-                    local_file,
-                ).execute_query()  # download MIME representation of a message
-
-        except Exception as e:
-            logger.error(
-                f"Error while downloading and saving file: {self.file.subject}.",
-            )
-            logger.error(e)
-            return
-        logger.info(f"File downloaded: {self.file.subject}")
-        return
-
-
-class OutlookConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleOutlookConfig
-
-    def __init__(
-        self,
-        standard_config: StandardConnectorConfig,
-        config: SimpleOutlookConfig,
-    ):
-        super().__init__(standard_config, config)
-        self._set_client()
-        self.get_folder_ids()
-
-    @requires_dependencies(["office365"])
-    def _set_client(self):
-        from office365.graph_client import GraphClient
-
-        self.client = GraphClient(self.config.token_factory)
-
-    def initialize(self):
-        pass
-
-    def recurse_folders(self, folder_id, main_folder_dict):
-        """We only get a count of subfolders for any folder.
-        Have to make additional calls to get subfolder ids."""
-        subfolders = (
-            self.client.users[self.config.user_email]
-            .mail_folders[folder_id]
-            .child_folders.get()
-            .execute_query()
-        )
-        for subfolder in subfolders:
-            for k, v in main_folder_dict.items():
-                if subfolder.get_property("parentFolderId") in v:
-                    v.append(subfolder.id)
-            if subfolder.get_property("childFolderCount") > 0:
-                self.recurse_folders(subfolder.id, main_folder_dict)
-
-    def get_folder_ids(self):
-        """Sets the mail folder ids and subfolder ids for requested root mail folders."""
-        self.root_folders = defaultdict(list)
-        root_folders_with_subfolders = []
-        get_root_folders = (
-            self.client.users[self.config.user_email].mail_folders.get().execute_query()
-        )
-
-        for folder in get_root_folders:
-            self.root_folders[folder.display_name].append(folder.id)
-            if folder.get_property("childFolderCount") > 0:
-                root_folders_with_subfolders.append(folder.id)
-
-        for folder in root_folders_with_subfolders:
-            self.recurse_folders(folder, self.root_folders)
-
-        # Narrow down all mail folder ids (plus all subfolders) to the ones that were requested.
-        self.selected_folder_ids = list(
-            chain.from_iterable(
-                [
-                    v
-                    for k, v in self.root_folders.items()
-                    if k.lower() in [x.lower() for x in self.config.ms_outlook_folders]
-                ],
-            ),
-        )
-        if not self.selected_folder_ids:
-            raise MissingFolderError(
-                f"There are no root folders with the names: {self.config.ms_outlook_folders}",
-            )
-
-    def get_ingest_docs(self):
-        """Returns a list of all the message objects that are in the requested root folder(s)."""
-        filtered_messages = []
-
-        # Get all the relevant messages in the selected folders/subfolders.
-        for folder_id in self.selected_folder_ids:
-            messages = (
-                self.client.users[self.config.user_email]
-                .mail_folders[folder_id]
-                .messages.get()
-                .top(MAX_NUM_EMAILS)  # Prevents the return from paging
-                .execute_query()
-            )
-            # Skip empty list if there are no messages in folder.
-            if messages:
-                filtered_messages.append(messages)
-
-        # Filtered messages have an un-downloadable resource path.
-        # So we get each message object individually.
-        individual_messages = []
-        for m in list(chain.from_iterable(filtered_messages)):
-            messages = (
-                self.client.users[self.config.user_email].messages[m.id].get().execute_query()
-            )
-            individual_messages.append(messages)
-
-        return [OutlookIngestDoc(self.standard_config, self.config, f) for f in individual_messages]
diff --git a/src/unstructured/ingest/connector/reddit.py b/src/unstructured/ingest/connector/reddit.py
deleted file mode 100644
index 2561aeb..0000000
--- a/src/unstructured/ingest/connector/reddit.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Optional
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from praw.models import Submission
-
-
-@dataclass
-class SimpleRedditConfig(BaseConnectorConfig):
-    subreddit_name: str
-    client_id: Optional[str]
-    client_secret: Optional[str]
-    user_agent: str
-    search_query: Optional[str]
-    num_posts: int
-
-    def __post_init__(self):
-        if self.num_posts <= 0:
-            raise ValueError("The number of Reddit posts to fetch must be positive.")
-
-
-@dataclass
-class RedditIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleRedditConfig = field(repr=False)
-    post: "Submission"
-
-    @property
-    def filename(self) -> Path:
-        return (Path(self.standard_config.download_dir) / f"{self.post.id}.md").resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.standard_config.output_dir) / f"{self.post.id}.json"
-
-    def _create_full_tmp_dir_path(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-        # Write the title plus the body, if any
-        text_to_write = f"# {self.post.title}\n{self.post.selftext}"
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(text_to_write)
-
-
-@requires_dependencies(["praw"], extras="reddit")
-class RedditConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleRedditConfig
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: SimpleRedditConfig):
-        from praw import Reddit
-
-        super().__init__(standard_config, config)
-        self.reddit = Reddit(
-            client_id=config.client_id,
-            client_secret=config.client_secret,
-            user_agent=config.user_agent,
-        )
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        subreddit = self.reddit.subreddit(self.config.subreddit_name)
-        if self.config.search_query:
-            posts = subreddit.search(self.config.search_query, limit=self.config.num_posts)
-        else:
-            posts = subreddit.hot(limit=self.config.num_posts)
-        return [RedditIngestDoc(self.standard_config, self.config, post) for post in posts]
diff --git a/src/unstructured/ingest/connector/s3.py b/src/unstructured/ingest/connector/s3.py
deleted file mode 100644
index 224de3c..0000000
--- a/src/unstructured/ingest/connector/s3.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from dataclasses import dataclass
-from typing import Type
-
-from unstructured.ingest.connector.fsspec import (
-    FsspecConnector,
-    FsspecIngestDoc,
-    SimpleFsspecConfig,
-)
-from unstructured.ingest.interfaces import StandardConnectorConfig
-from unstructured.utils import requires_dependencies
-
-
-@dataclass
-class SimpleS3Config(SimpleFsspecConfig):
-    pass
-
-
-class S3IngestDoc(FsspecIngestDoc):
-    @requires_dependencies(["s3fs", "fsspec"], extras="s3")
-    def get_file(self):
-        super().get_file()
-
-
-@requires_dependencies(["s3fs", "fsspec"], extras="s3")
-class S3Connector(FsspecConnector):
-    ingest_doc_cls: Type[S3IngestDoc] = S3IngestDoc
-
-    def __init__(
-        self,
-        config: SimpleS3Config,
-        standard_config: StandardConnectorConfig,
-    ) -> None:
-        super().__init__(standard_config, config)
diff --git a/src/unstructured/ingest/connector/sharepoint.py b/src/unstructured/ingest/connector/sharepoint.py
deleted file mode 100644
index 840ccf8..0000000
--- a/src/unstructured/ingest/connector/sharepoint.py
+++ /dev/null
@@ -1,328 +0,0 @@
-from dataclasses import dataclass, field
-from html import unescape
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
-from urllib.parse import urlparse
-
-from unstructured.file_utils.filetype import EXT_TO_FILETYPE
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import requires_dependencies
-
-if TYPE_CHECKING:
-    from office365.sharepoint.files.file import File
-
-MAX_MB_SIZE = 512_000_000
-
-
-@dataclass
-class SimpleSharepointConfig(BaseConnectorConfig):
-    client_id: str
-    client_credential: str = field(repr=False)
-    site_url: str
-    path: str
-    process_pages: bool = False
-    recursive: bool = False
-
-    def __post_init__(self):
-        if not (self.client_id and self.client_credential and self.site_url):
-            raise ValueError(
-                "Please provide one of the following mandatory values:"
-                "\n--client-id\n--client-cred\n--site",
-            )
-
-
-@dataclass
-class SharepointIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleSharepointConfig
-    file: "File"
-    meta: dict
-
-    def __post_init__(self):
-        self.ext = "".join(Path(self.file.name).suffixes) if not self.meta else ".html"
-        self.ext = self.ext if self.ext != ".aspx" else ".html"
-
-        if not self.ext:
-            raise ValueError("Unsupported file without extension.")
-
-        if self.ext not in EXT_TO_FILETYPE:
-            raise ValueError(
-                f"Extension {self.ext} not supported. "
-                f"Value MUST be one of {', '.join([k for k in EXT_TO_FILETYPE if k is not None])}.",
-            )
-        self._set_download_paths()
-
-    def _set_download_paths(self) -> None:
-        """Parses the folder structure from the source and creates the download and output paths"""
-        download_path = Path(f"{self.standard_config.download_dir}")
-        output_path = Path(f"{self.standard_config.output_dir}")
-        if self.meta:
-            page_url = self.meta["page"].get_property("Url", "")
-            parent = (
-                Path(page_url).with_suffix(self.ext)
-                if (self.meta["site_path"] is None)
-                else Path(self.meta["site_path"] + "/" + page_url).with_suffix(self.ext)
-            )
-        else:
-            parent = Path(self.file.serverRelativeUrl[1:])
-        self.download_dir = (download_path / parent.parent).resolve()
-        self.download_filepath = (download_path / parent).resolve()
-        oname = f"{str(parent)[:-len(self.ext)]}.json"
-        self.output_dir = (output_path / parent.parent).resolve()
-        self.output_filepath = (output_path / oname).resolve()
-
-    @property
-    def filename(self):
-        return Path(self.download_filepath).resolve()
-
-    @property
-    def _output_filename(self):
-        return Path(self.output_filepath).resolve()
-
-    @property
-    def date_created(self) -> Optional[str]:
-        if self.meta:
-            return self.meta["page"].properties.get("FirstPublished", None)
-        return self.file.time_created
-
-    @property
-    def date_modified(self) -> Optional[str]:
-        if self.meta:
-            return self.meta["page"].properties.get("Modified", None)
-        return self.file.time_last_modified
-
-    @property
-    def exists(self) -> Optional[bool]:
-        if self.meta:
-            return self.meta["page"].properties.get("FileName", None) and self.meta[
-                "page"
-            ].properties.get("UniqueId", None)
-        return self.file.exists
-
-    @property
-    def record_locator(self) -> Optional[Dict[str, Any]]:
-        if self.meta:
-            record_source = self.meta["page"]
-            property_name = "AbsoluteUrl"
-            resource_url_name = "absolute_url"
-        else:
-            record_source = self.file
-            property_name = "ServerRelativeUrl"
-            resource_url_name = "server_relative_url"
-
-        return {
-            "site": self.config.site_url,
-            "unique_id": record_source.get_property("UniqueId", ""),
-            resource_url_name: record_source.get_property(property_name, ""),
-        }
-
-    @property
-    def version(self) -> Optional[str]:
-        if self.meta:
-            return self.meta["page"].properties.get("Version", "")
-
-        if (n_versions := len(self.file.versions)) > 0:
-            return self.file.versions[n_versions - 1].properties.get("id", None)
-        return None
-
-    def _get_page(self):
-        """Retrieves HTML content of the Sharepoint site through the CanvasContent1 and
-        LayoutWebpartsContent1"""
-
-        try:
-            content_labels = ["CanvasContent1", "LayoutWebpartsContent1"]
-            content = self.file.listItemAllFields.select(content_labels).get().execute_query()
-            pld = (content.properties.get("LayoutWebpartsContent1", "") or "") + (
-                content.properties.get("CanvasContent1", "") or ""
-            )
-            if pld != "":
-                pld = unescape(pld)
-            else:
-                logger.info(
-                    f"Page {self.meta['page'].get_property('Url', '')} has no retrievable content. \
-                      Dumping empty doc.",
-                )
-                pld = "<div></div>"
-
-            self.output_dir.mkdir(parents=True, exist_ok=True)
-            if not self.download_dir.is_dir():
-                logger.debug(f"Creating directory: {self.download_dir}")
-                self.download_dir.mkdir(parents=True, exist_ok=True)
-            with self.filename.open(mode="w") as f:
-                f.write(pld)
-        except Exception as e:
-            logger.error(f"Error while downloading and saving file: {self.filename}.")
-            logger.error(e)
-            return
-        logger.info(f"File downloaded: {self.filename}")
-
-    def _get_file(self):
-        try:
-            fsize = self.file.length
-            self.output_dir.mkdir(parents=True, exist_ok=True)
-
-            if not self.download_dir.is_dir():
-                logger.debug(f"Creating directory: {self.download_dir}")
-                self.download_dir.mkdir(parents=True, exist_ok=True)
-
-            if fsize > MAX_MB_SIZE:
-                logger.info(f"Downloading file with size: {fsize} bytes in chunks")
-                with self.filename.open(mode="wb") as f:
-                    self.file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
-            else:
-                with self.filename.open(mode="wb") as f:
-                    self.file.download(f).execute_query()
-        except Exception as e:
-            logger.error(f"Error while downloading and saving file: {self.filename}.")
-            logger.error(e)
-            return
-        logger.info(f"File downloaded: {self.filename}")
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(["office365"])
-    def get_file(self):
-        if not self.meta:
-            self._get_file()
-        else:
-            self._get_page()
-        return
-
-
-class SharepointConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleSharepointConfig
-    tenant: None
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: SimpleSharepointConfig):
-        super().__init__(standard_config, config)
-        self._setup_client()
-
-    @requires_dependencies(["office365"])
-    def _setup_client(self):
-        from office365.runtime.auth.client_credential import ClientCredential
-        from office365.sharepoint.client_context import ClientContext
-
-        parsed_url = urlparse(self.config.site_url)
-        site_hostname = (parsed_url.hostname or "").split(".")
-        tenant_url = site_hostname[0].split("-")
-        self.process_all = False
-        self.base_site_url = ""
-        if tenant_url[-1] == "admin" and (parsed_url.path is None or parsed_url.path == "/"):
-            self.process_all = True
-            self.base_site_url = parsed_url._replace(
-                netloc=parsed_url.netloc.replace(site_hostname[0], tenant_url[0]),
-            ).geturl()
-        elif tenant_url[-1] == "admin":
-            raise ValueError(
-                "A site url in the form of https://[tenant]-admin.sharepoint.com \
-                is required to process all sites within a tenant. ",
-            )
-
-        self.client = ClientContext(self.config.site_url).with_credentials(
-            ClientCredential(self.config.client_id, self.config.client_credential),
-        )
-
-    @requires_dependencies(["office365"])
-    def _list_files(self, folder, recursive) -> List["File"]:
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        try:
-            objects = folder.expand(["Files", "Folders"]).get().execute_query()
-            files = list(objects.files)
-            if not recursive:
-                return files
-            for f in objects.folders:
-                if "/Forms" in f.serverRelativeUrl:
-                    continue
-                files += self._list_files(f, recursive)
-            return files
-        except ClientRequestException as e:
-            if e.response.status_code != 404:
-                logger.info("Caught an error while processing documents %s", e.response.text)
-            return []
-
-    @requires_dependencies(["office365"])
-    def _list_pages(self, site_client) -> list:
-        from office365.runtime.client_request_exception import ClientRequestException
-
-        try:
-            pages = site_client.site_pages.pages.get().execute_query()
-            page_files = []
-
-            for page_meta in pages:
-                page_url = page_meta.get_property("Url", None)
-                if page_url is None:
-                    logger.info("Missing site_url. Omitting page... ")
-                    break
-                page_url = f"/{page_url}" if page_url[0] != "/" else page_url
-                file_page = site_client.web.get_file_by_server_relative_path(page_url)
-                site_path = None
-                if (url_path := (urlparse(site_client.base_url).path)) and (url_path != "/"):
-                    site_path = url_path[1:]
-                page_files.append(
-                    [file_page, {"page": page_meta, "site_path": site_path}],
-                )
-        except ClientRequestException as e:
-            logger.info("Caught an error while processing pages %s", e.response.text)
-            return []
-
-        return page_files
-
-    def initialize(self):
-        pass
-
-    def _ingest_site_docs(self, site_client) -> List["SharepointIngestDoc"]:
-        root_folder = site_client.web.get_folder_by_server_relative_path(self.config.path)
-        files = self._list_files(root_folder, self.config.recursive)
-        if not files:
-            logger.info(
-                f"Couldn't process files in path {self.config.path} \
-                for site {site_client.base_url}",
-            )
-        output = [SharepointIngestDoc(self.standard_config, self.config, f, {}) for f in files]
-        if self.config.process_pages:
-            page_files = self._list_pages(site_client)
-            if not page_files:
-                logger.info(f"Couldn't process pages for site {site_client.base_url}")
-            page_output = [
-                SharepointIngestDoc(self.standard_config, self.config, f[0], f[1])
-                for f in page_files
-            ]
-            output = output + page_output
-        return output
-
-    def _filter_site_url(self, site):
-        if site.url is None:
-            return False
-        return (site.url[0 : len(self.base_site_url)] == self.base_site_url) and (  # noqa: E203
-            "/sites/" in site.url
-        )
-
-    @requires_dependencies(["office365"])
-    def get_ingest_docs(self):
-        if self.process_all:
-            logger.debug(self.base_site_url)
-            from office365.runtime.auth.client_credential import ClientCredential
-            from office365.sharepoint.client_context import ClientContext
-            from office365.sharepoint.tenant.administration.tenant import Tenant
-
-            tenant = Tenant(self.client)
-            tenant_sites = tenant.get_site_properties_from_sharepoint_by_filters().execute_query()
-            tenant_sites = [s.url for s in tenant_sites if self._filter_site_url(s)]
-            tenant_sites.append(self.base_site_url)
-            ingest_docs: List[SharepointIngestDoc] = []
-            for site_url in set(tenant_sites):
-                logger.info(f"Processing docs for site: {site_url}")
-                site_client = ClientContext(site_url).with_credentials(
-                    ClientCredential(self.config.client_id, self.config.client_credential),
-                )
-                ingest_docs = ingest_docs + self._ingest_site_docs(site_client)
-            return ingest_docs
-        else:
-            return self._ingest_site_docs(self.client)
diff --git a/src/unstructured/ingest/connector/slack.py b/src/unstructured/ingest/connector/slack.py
deleted file mode 100644
index da933cc..0000000
--- a/src/unstructured/ingest/connector/slack.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import os
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-from typing import List, Optional
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-from unstructured.utils import (
-    requires_dependencies,
-    validate_date_args,
-)
-
-DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z")
-
-
-@dataclass
-class SimpleSlackConfig(BaseConnectorConfig):
-    """Connector config to process all messages by channel id's."""
-
-    channels: List[str]
-    token: str
-    oldest: Optional[str]
-    latest: Optional[str]
-    verbose: bool = False
-
-    def validate_inputs(self):
-        oldest_valid = True
-        latest_valid = True
-
-        if self.oldest:
-            oldest_valid = validate_date_args(self.oldest)
-
-        if self.latest:
-            latest_valid = validate_date_args(self.latest)
-
-        return oldest_valid, latest_valid
-
-    def __post_init__(self):
-        oldest_valid, latest_valid = self.validate_inputs()
-        if not oldest_valid and not latest_valid:
-            raise ValueError(
-                "Start and/or End dates are not valid. ",
-            )
-
-    @staticmethod
-    def parse_channels(channel_str: str) -> List[str]:
-        """Parses a comma separated list of channels into a list."""
-        return [x.strip() for x in channel_str.split(",")]
-
-
-@dataclass
-class SlackIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    """Class encapsulating fetching a doc and writing processed results (but not
-    doing the processing!).
-
-    Also includes a cleanup method. When things go wrong and the cleanup
-    method is not called, the file is left behind on the filesystem to assist debugging.
-    """
-
-    config: SimpleSlackConfig
-    channel: str
-    token: str
-    oldest: Optional[str]
-    latest: Optional[str]
-
-    # NOTE(crag): probably doesn't matter,  but intentionally not defining tmp_download_file
-    # __post_init__ for multiprocessing simplicity (no Path objects in initially
-    # instantiated object)
-    def _tmp_download_file(self):
-        channel_file = self.channel + ".txt"
-        return Path(self.standard_config.download_dir) / channel_file
-
-    @property
-    def _output_filename(self):
-        output_file = self.channel + ".json"
-        return Path(self.standard_config.output_dir) / output_file
-
-    def _create_full_tmp_dir_path(self):
-        self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    @requires_dependencies(dependencies=["slack_sdk"], extras="slack")
-    def get_file(self):
-        from slack_sdk import WebClient
-        from slack_sdk.errors import SlackApiError
-
-        """Fetches the data from a slack channel and stores it locally."""
-
-        self._create_full_tmp_dir_path()
-
-        if self.config.verbose:
-            logger.debug(f"fetching channel {self.channel} - PID: {os.getpid()}")
-
-        messages = []
-        self.client = WebClient(token=self.token)
-
-        try:
-            oldest = "0"
-            latest = "0"
-            if self.oldest:
-                oldest = self.convert_datetime(self.oldest)
-
-            if self.latest:
-                latest = self.convert_datetime(self.latest)
-
-            result = self.client.conversations_history(
-                channel=self.channel,
-                oldest=oldest,
-                latest=latest,
-            )
-            messages.extend(result["messages"])
-            while result["has_more"]:
-                result = self.client.conversations_history(
-                    channel=self.channel,
-                    oldest=oldest,
-                    latest=latest,
-                    cursor=result["response_metadata"]["next_cursor"],
-                )
-                messages.extend(result["messages"])
-        except SlackApiError as e:
-            logger.error(f"Error: {e}")
-
-        with open(self._tmp_download_file(), "w") as channel_file:
-            for message in messages:
-                channel_file.write(message["text"] + "\n")
-
-    def convert_datetime(self, date_time):
-        for format in DATE_FORMATS:
-            try:
-                return datetime.strptime(date_time, format).timestamp()
-            except ValueError:
-                pass
-
-    @property
-    def filename(self):
-        """The filename of the file created from a slack channel"""
-        return self._tmp_download_file()
-
-
-@requires_dependencies(dependencies=["slack_sdk"], extras="slack")
-class SlackConnector(ConnectorCleanupMixin, BaseConnector):
-    """Objects of this class support fetching document(s) from"""
-
-    config: SimpleSlackConfig
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: SimpleSlackConfig):
-        super().__init__(standard_config, config)
-
-    def initialize(self):
-        """Verify that can get metadata for an object, validates connections info."""
-        pass
-
-    def get_ingest_docs(self):
-        return [
-            SlackIngestDoc(
-                self.standard_config,
-                self.config,
-                channel,
-                self.config.token,
-                self.config.oldest,
-                self.config.latest,
-            )
-            for channel in self.config.channels
-        ]
diff --git a/src/unstructured/ingest/connector/wikipedia.py b/src/unstructured/ingest/connector/wikipedia.py
deleted file mode 100644
index 90b97dc..0000000
--- a/src/unstructured/ingest/connector/wikipedia.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    BaseConnectorConfig,
-    BaseIngestDoc,
-    ConnectorCleanupMixin,
-    IngestDocCleanupMixin,
-    StandardConnectorConfig,
-)
-from unstructured.ingest.logger import logger
-
-if TYPE_CHECKING:
-    from wikipedia import WikipediaPage
-
-
-@dataclass
-class SimpleWikipediaConfig(BaseConnectorConfig):
-    title: str
-    auto_suggest: bool
-
-
-@dataclass
-class WikipediaIngestDoc(IngestDocCleanupMixin, BaseIngestDoc):
-    config: SimpleWikipediaConfig = field(repr=False)
-    page: "WikipediaPage"
-
-    @property
-    def filename(self) -> Path:
-        raise NotImplementedError()
-
-    @property
-    def text(self) -> str:
-        raise NotImplementedError()
-
-    @property
-    def _output_filename(self):
-        raise NotImplementedError()
-
-    def _create_full_tmp_dir_path(self):
-        self.filename.parent.mkdir(parents=True, exist_ok=True)
-
-    @BaseIngestDoc.skip_if_file_exists
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        self._create_full_tmp_dir_path()
-        logger.debug(f"Fetching {self} - PID: {os.getpid()}")
-        with open(self.filename, "w", encoding="utf8") as f:
-            f.write(self.text)
-
-
-class WikipediaIngestHTMLDoc(WikipediaIngestDoc):
-    @property
-    def filename(self) -> Path:
-        return (
-            Path(self.standard_config.download_dir)
-            / f"{self.page.title}-{self.page.revision_id}.html"
-        ).resolve()
-
-    @property
-    def text(self):
-        return self.page.html()
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.standard_config.output_dir)
-            / f"{self.page.title}-{self.page.revision_id}-html.json"
-        )
-
-
-class WikipediaIngestTextDoc(WikipediaIngestDoc):
-    @property
-    def filename(self) -> Path:
-        return (
-            Path(self.standard_config.download_dir)
-            / f"{self.page.title}-{self.page.revision_id}.txt"
-        ).resolve()
-
-    @property
-    def text(self):
-        return self.page.content
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.standard_config.output_dir)
-            / f"{self.page.title}-{self.page.revision_id}-txt.json"
-        )
-
-
-class WikipediaIngestSummaryDoc(WikipediaIngestDoc):
-    @property
-    def filename(self) -> Path:
-        return (
-            Path(self.standard_config.download_dir)
-            / f"{self.page.title}-{self.page.revision_id}-summary.txt"
-        ).resolve()
-
-    @property
-    def text(self):
-        return self.page.summary
-
-    @property
-    def _output_filename(self):
-        return (
-            Path(self.standard_config.output_dir)
-            / f"{self.page.title}-{self.page.revision_id}-summary.json"
-        )
-
-
-class WikipediaConnector(ConnectorCleanupMixin, BaseConnector):
-    config: SimpleWikipediaConfig
-
-    def __init__(self, config: SimpleWikipediaConfig, standard_config: StandardConnectorConfig):
-        super().__init__(standard_config, config)
-
-    def initialize(self):
-        pass
-
-    def get_ingest_docs(self):
-        import wikipedia
-
-        page = wikipedia.page(
-            self.config.title,
-            auto_suggest=self.config.auto_suggest,
-        )
-        return [
-            WikipediaIngestTextDoc(self.standard_config, self.config, page),
-            WikipediaIngestHTMLDoc(self.standard_config, self.config, page),
-            WikipediaIngestSummaryDoc(self.standard_config, self.config, page),
-        ]
diff --git a/src/unstructured/ingest/doc_processor/__init__.py b/src/unstructured/ingest/doc_processor/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unstructured/ingest/doc_processor/generalized.py b/src/unstructured/ingest/doc_processor/generalized.py
deleted file mode 100644
index 243d465..0000000
--- a/src/unstructured/ingest/doc_processor/generalized.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Process arbitrary files with the Unstructured library"""
-
-import os
-from typing import Any, Dict, List, Optional
-
-from unstructured_inference.models.base import get_model
-
-from unstructured.ingest.interfaces import BaseIngestDoc as IngestDoc
-from unstructured.ingest.interfaces import (
-    BaseSessionHandle,
-    IngestDocSessionHandleMixin,
-)
-from unstructured.ingest.logger import logger
-
-# module-level variable to store session handle
-session_handle: Optional[BaseSessionHandle] = None
-
-
-def initialize():
-    """Download default model or model specified by UNSTRUCTURED_HI_RES_MODEL_NAME environment
-    variable (avoids subprocesses all doing the same)"""
-
-    # If more than one model will be supported and left up to user selection
-    supported_model = os.environ.get("UNSTRUCTURED_HI_RES_SUPPORTED_MODEL", "")
-    if supported_model:
-        for model_name in supported_model.split(","):
-            get_model(model_name=model_name)
-
-    get_model(os.environ.get("UNSTRUCTURED_HI_RES_MODEL_NAME"))
-
-
-def process_document(doc: "IngestDoc", **partition_kwargs) -> Optional[List[Dict[str, Any]]]:
-    """Process any IngestDoc-like class of document with chosen Unstructured's partition logic.
-
-    Parameters
-    ----------
-    partition_kwargs
-        ultimately the parameters passed to partition()
-    """
-    global session_handle
-    isd_elems_no_filename = None
-    try:
-        if isinstance(doc, IngestDocSessionHandleMixin):
-            if session_handle is None:
-                # create via doc.session_handle, which is a property that creates a
-                # session handle if one is not already defined
-                session_handle = doc.session_handle
-            else:
-                doc.session_handle = session_handle
-        # does the work necessary to load file into filesystem
-        # in the future, get_file_handle() could also be supported
-        doc.get_file()
-
-        isd_elems_no_filename = doc.process_file(**partition_kwargs)
-
-        # Note, this may be a no-op if the IngestDoc doesn't do anything to persist
-        # the results. Instead, the Processor (caller) may work with the aggregate
-        # results across all docs in memory.
-        doc.write_result()
-    except Exception:
-        # TODO(crag) save the exception instead of print?
-        logger.error(f"Failed to process {doc}", exc_info=True)
-    finally:
-        doc.cleanup_file()
-        return isd_elems_no_filename
diff --git a/src/unstructured/ingest/interfaces.py b/src/unstructured/ingest/interfaces.py
deleted file mode 100644
index cc686ab..0000000
--- a/src/unstructured/ingest/interfaces.py
+++ /dev/null
@@ -1,361 +0,0 @@
-"""Defines Abstract Base Classes (ABC's) core to batch processing documents
-through Unstructured."""
-
-import functools
-import json
-import os
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import requests
-
-from unstructured.documents.elements import DataSourceMetadata
-from unstructured.ingest.logger import logger
-from unstructured.partition.auto import partition
-from unstructured.staging.base import convert_to_dict
-
-
-@dataclass
-class BaseSessionHandle(ABC):
-    """Abstract Base Class for sharing resources that are local to an individual process.
-    e.g., a connection for making a request for fetching documents."""
-
-
-@dataclass
-class ProcessorConfigs:
-    """Common set of config required when running data connectors."""
-
-    partition_strategy: str
-    partition_ocr_languages: str
-    partition_pdf_infer_table_structure: bool
-    partition_encoding: str
-    num_processes: int
-    reprocess: bool
-    max_docs: int
-
-
-@dataclass
-class StandardConnectorConfig:
-    """Common set of config options passed to all connectors."""
-
-    # where raw documents are stored for processing, and then removed if not preserve_downloads
-    download_dir: str
-    # where to write structured data outputs
-    output_dir: str
-    download_only: bool = False
-    fields_include: str = "element_id,text,type,metadata"
-    flatten_metadata: bool = False
-    metadata_exclude: Optional[str] = None
-    metadata_include: Optional[str] = None
-    partition_by_api: bool = False
-    partition_endpoint: str = "https://api.unstructured.io/general/v0/general"
-    api_key: str = ""
-    preserve_downloads: bool = False
-    re_download: bool = False
-
-
-class BaseConnectorConfig(ABC):
-    """Abstract definition on which to define connector-specific attributes."""
-
-
-@dataclass
-class BaseConnector(ABC):
-    """Abstract Base Class for a connector to a remote source, e.g. S3 or Google Drive."""
-
-    standard_config: StandardConnectorConfig
-    config: BaseConnectorConfig
-
-    def __init__(self, standard_config: StandardConnectorConfig, config: BaseConnectorConfig):
-        """Expects a standard_config object that implements StandardConnectorConfig
-        and config object that implements BaseConnectorConfig."""
-        self.standard_config = standard_config
-        self.config = config
-
-    @abstractmethod
-    def cleanup(self, cur_dir=None):
-        """Any additional cleanup up need after processing is complete. E.g., removing
-        temporary download dirs that are empty.
-
-        By convention, documents that failed to process are typically not cleaned up."""
-        pass
-
-    @abstractmethod
-    def initialize(self):
-        """Initializes the connector. Should also validate the connector is properly
-        configured: e.g., list a single a document from the source."""
-        pass
-
-    @abstractmethod
-    def get_ingest_docs(self):
-        """Returns all ingest docs (derived from BaseIngestDoc).
-        This does not imply downloading all the raw documents themselves,
-        rather each IngestDoc is capable of fetching its content (in another process)
-        with IngestDoc.get_file()."""
-        pass
-
-
-@dataclass
-class BaseIngestDoc(ABC):
-    """An "ingest document" is specific to a connector, and provides
-    methods to fetch a single raw document, store it locally for processing, any cleanup
-    needed after successful processing of the doc, and the ability to write the doc's
-    structured outputs once processed.
-
-    Crucially, it is not responsible for the actual processing of the raw document.
-    """
-
-    standard_config: StandardConnectorConfig
-    config: BaseConnectorConfig
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._date_processed = None
-
-    @property
-    def date_created(self) -> Optional[str]:
-        """The date the document was created on the source system."""
-        return None
-
-    @property
-    def date_modified(self) -> Optional[str]:
-        """The date the document was last modified on the source system."""
-        return None
-
-    @property
-    def date_processed(self) -> Optional[str]:
-        """The date the document was last processed by Unstructured.
-        self._date_processed is assigned internally in self.partition_file()"""
-        return self._date_processed
-
-    @property
-    def exists(self) -> Optional[bool]:
-        """Whether the document exists on the remote source."""
-        return None
-
-    @property
-    @abstractmethod
-    def filename(self):
-        """The local filename of the document after fetching from remote source."""
-
-    @property
-    @abstractmethod
-    def _output_filename(self):
-        """Filename of the structured output for this doc."""
-
-    @property
-    def record_locator(self) -> Optional[Dict[str, Any]]:  # Values must be JSON-serializable
-        """A dictionary with any data necessary to uniquely identify the document on
-        the source system."""
-        return None
-
-    @property
-    def source_url(self) -> Optional[str]:
-        """The url of the source document."""
-        return None
-
-    @property
-    def version(self) -> Optional[str]:
-        """The version of the source document, this could be the last modified date, an
-        explicit version number, or anything else that can be used to uniquely identify
-        the version of the document."""
-        return None
-
-    @abstractmethod
-    def cleanup_file(self):
-        """Removes the local copy the file (or anything else) after successful processing."""
-        pass
-
-    @staticmethod
-    def skip_if_file_exists(func):
-        """Decorator that checks if a file exists, is not empty, and should not re-download,
-        if so log a message indicating as much and skip the decorated function."""
-
-        @functools.wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if (
-                not self.standard_config.re_download
-                and self.filename.is_file()
-                and self.filename.stat().st_size
-            ):
-                logger.debug(f"File exists: {self.filename}, skipping {func.__name__}")
-                return None
-            return func(self, *args, **kwargs)
-
-        return wrapper
-
-    # NOTE(crag): Future BaseIngestDoc classes could define get_file_object() methods
-    # in addition to or instead of get_file()
-    @abstractmethod
-    def get_file(self):
-        """Fetches the "remote" doc and stores it locally on the filesystem."""
-        pass
-
-    def has_output(self) -> bool:
-        """Determine if structured output for this doc already exists."""
-        return self._output_filename.is_file() and self._output_filename.stat().st_size
-
-    def write_result(self):
-        """Write the structured json result for this doc. result must be json serializable."""
-        if self.standard_config.download_only:
-            return
-        self._output_filename.parent.mkdir(parents=True, exist_ok=True)
-        with open(self._output_filename, "w", encoding="utf8") as output_f:
-            json.dump(self.isd_elems_no_filename, output_f, ensure_ascii=False, indent=2)
-        logger.info(f"Wrote {self._output_filename}")
-
-    def partition_file(self, **partition_kwargs) -> List[Dict[str, Any]]:
-        if not self.standard_config.partition_by_api:
-            logger.debug("Using local partition")
-            elements = partition(
-                filename=str(self.filename),
-                data_source_metadata=DataSourceMetadata(
-                    url=self.source_url,
-                    version=self.version,
-                    record_locator=self.record_locator,
-                    date_created=self.date_created,
-                    date_modified=self.date_modified,
-                    date_processed=self.date_processed,
-                ),
-                **partition_kwargs,
-            )
-            return convert_to_dict(elements)
-
-        else:
-            endpoint = self.standard_config.partition_endpoint
-
-            logger.debug(f"Using remote partition ({endpoint})")
-
-            with open(self.filename, "rb") as f:
-                headers_dict = {}
-                if len(self.standard_config.api_key) > 0:
-                    headers_dict["UNSTRUCTURED-API-KEY"] = self.standard_config.api_key
-                response = requests.post(
-                    f"{endpoint}",
-                    files={"files": (str(self.filename), f)},
-                    headers=headers_dict,
-                    # TODO: add m_data_source_metadata to unstructured-api pipeline_api and then
-                    # pass the stringified json here
-                )
-
-            if response.status_code != 200:
-                raise RuntimeError(f"Caught {response.status_code} from API: {response.text}")
-
-            return response.json()
-
-    def process_file(self, **partition_kwargs) -> Optional[List[Dict[str, Any]]]:
-        self._date_processed = datetime.utcnow().isoformat()
-        if self.standard_config.download_only:
-            return None
-        logger.info(f"Processing {self.filename}")
-
-        isd_elems = self.partition_file(**partition_kwargs)
-
-        self.isd_elems_no_filename = []
-        for elem in isd_elems:
-            # type: ignore
-            if (
-                self.standard_config.metadata_exclude is not None
-                and self.standard_config.metadata_include is not None
-            ):
-                raise ValueError(
-                    "Arguments `--metadata-include` and `--metadata-exclude` are "
-                    "mutually exclusive with each other.",
-                )
-            elif self.standard_config.metadata_exclude is not None:
-                ex_list = self.standard_config.metadata_exclude.split(",")
-                for ex in ex_list:
-                    if "." in ex:  # handle nested fields
-                        nested_fields = ex.split(".")
-                        current_elem = elem
-                        for field in nested_fields[:-1]:
-                            if field in current_elem:
-                                current_elem = current_elem[field]
-                        field_to_exclude = nested_fields[-1]
-                        if field_to_exclude in current_elem:
-                            current_elem.pop(field_to_exclude, None)
-                    else:  # handle top-level fields
-                        elem["metadata"].pop(ex, None)  # type: ignore[attr-defined]
-            elif self.standard_config.metadata_include is not None:
-                in_list = self.standard_config.metadata_include.split(",")
-                for k in list(elem["metadata"].keys()):  # type: ignore[attr-defined]
-                    if k not in in_list:
-                        elem["metadata"].pop(k, None)  # type: ignore[attr-defined]
-
-            in_list = self.standard_config.fields_include.split(",")
-            elem = {k: v for k, v in elem.items() if k in in_list}
-
-            if self.standard_config.flatten_metadata:
-                for k, v in elem["metadata"].items():  # type: ignore[attr-defined]
-                    elem[k] = v
-                elem.pop("metadata")  # type: ignore[attr-defined]
-
-            self.isd_elems_no_filename.append(elem)
-
-        return self.isd_elems_no_filename
-
-
-class ConnectorCleanupMixin:
-    standard_config: StandardConnectorConfig
-
-    def cleanup(self, cur_dir=None):
-        """Recursively clean up downloaded files and directories."""
-        if self.standard_config.preserve_downloads or self.standard_config.download_only:
-            return
-        if cur_dir is None:
-            cur_dir = self.standard_config.download_dir
-        if cur_dir is None or not Path(cur_dir).is_dir():
-            return
-        sub_dirs = os.listdir(cur_dir)
-        os.chdir(cur_dir)
-        for sub_dir in sub_dirs:
-            # don't traverse symlinks, not that there every should be any
-            if os.path.isdir(sub_dir) and not os.path.islink(sub_dir):
-                self.cleanup(sub_dir)
-        os.chdir("..")
-        if len(os.listdir(cur_dir)) == 0:
-            os.rmdir(cur_dir)
-
-
-class IngestDocCleanupMixin:
-    standard_config: StandardConnectorConfig
-
-    @property
-    @abstractmethod
-    def filename(self):
-        """The local filename of the document after fetching from remote source."""
-
-    def cleanup_file(self):
-        """Removes the local copy of the file after successful processing."""
-        if (
-            not self.standard_config.preserve_downloads
-            and self.filename.is_file()
-            and not self.standard_config.download_only
-        ):
-            logger.debug(f"Cleaning up {self}")
-            os.unlink(self.filename)
-
-
-class ConfigSessionHandleMixin:
-    @abstractmethod
-    def create_session_handle(self) -> BaseSessionHandle:
-        """Creates a session handle that will be assigned on each IngestDoc to share
-        session related resources across all document handling for a given subprocess."""
-
-
-class IngestDocSessionHandleMixin:
-    config: ConfigSessionHandleMixin
-    _session_handle: Optional[BaseSessionHandle] = None
-
-    @property
-    def session_handle(self):
-        """If a session handle is not assigned, creates a new one and assigns it."""
-        if self._session_handle is None:
-            self._session_handle = self.config.create_session_handle()
-        return self._session_handle
-
-    @session_handle.setter
-    def session_handle(self, session_handle: BaseSessionHandle):
-        self._session_handle = session_handle
diff --git a/src/unstructured/ingest/logger.py b/src/unstructured/ingest/logger.py
deleted file mode 100644
index 752662c..0000000
--- a/src/unstructured/ingest/logger.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import logging
-
-logger = logging.getLogger("unstructured.ingest")
-
-
-def ingest_log_streaming_init(level: int) -> None:
-    handler = logging.StreamHandler()
-    handler.name = "ingest_log_handler"
-    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
-    handler.setFormatter(formatter)
-
-    # Only want to add the handler once
-    if "ingest_log_handler" not in [h.name for h in logger.handlers]:
-        logger.addHandler(handler)
-
-    logger.setLevel(level)
-
-
-def make_default_logger(level: int) -> logging.Logger:
-    """Return a custom logger."""
-    logger = logging.getLogger("unstructured.ingest")
-    handler = logging.StreamHandler()
-    handler.name = "ingest_log_handler"
-    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
-    handler.setFormatter(formatter)
-    logger.setLevel(level)
-    return logger
diff --git a/src/unstructured/ingest/main.py b/src/unstructured/ingest/main.py
deleted file mode 100755
index ead616f..0000000
--- a/src/unstructured/ingest/main.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-from unstructured.ingest.cli.cli import get_cmd
-
-
-def main():
-    ingest_cmd = get_cmd()
-    ingest_cmd()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/unstructured/ingest/processor.py b/src/unstructured/ingest/processor.py
deleted file mode 100644
index dbf75d6..0000000
--- a/src/unstructured/ingest/processor.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import logging
-import multiprocessing as mp
-from contextlib import suppress
-from functools import partial
-
-from unstructured.ingest.doc_processor.generalized import initialize, process_document
-from unstructured.ingest.interfaces import (
-    BaseConnector,
-    ProcessorConfigs,
-)
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-
-with suppress(RuntimeError):
-    mp.set_start_method("spawn")
-
-
-class Processor:
-    def __init__(
-        self,
-        doc_connector,
-        doc_processor_fn,
-        num_processes,
-        reprocess,
-        verbose,
-        max_docs,
-    ):
-        # initialize the reader and writer
-        self.doc_connector = doc_connector
-        self.doc_processor_fn = doc_processor_fn
-        self.num_processes = num_processes
-        self.reprocess = reprocess
-        self.verbose = verbose
-        self.max_docs = max_docs
-
-    def initialize(self):
-        """Slower initialization things: check connections, load things into memory, etc."""
-        ingest_log_streaming_init(logging.DEBUG if self.verbose else logging.INFO)
-        self.doc_connector.initialize()
-        initialize()
-
-    def cleanup(self):
-        self.doc_connector.cleanup()
-
-    def _filter_docs_with_outputs(self, docs):
-        num_docs_all = len(docs)
-        docs = [doc for doc in docs if not doc.has_output()]
-        if self.max_docs is not None:
-            if num_docs_all > self.max_docs:
-                num_docs_all = self.max_docs
-            docs = docs[: self.max_docs]
-        num_docs_to_process = len(docs)
-        if num_docs_to_process == 0:
-            logger.info(
-                "All docs have structured outputs, nothing to do. Use --reprocess to process all.",
-            )
-            return None
-        elif num_docs_to_process != num_docs_all:
-            logger.info(
-                f"Skipping processing for {num_docs_all - num_docs_to_process} docs out of "
-                f"{num_docs_all} since their structured outputs already exist, use --reprocess to "
-                "reprocess those in addition to the unprocessed ones.",
-            )
-        return docs
-
-    def run(self):
-        self.initialize()
-
-        # fetch the list of lazy downloading IngestDoc obj's
-        docs = self.doc_connector.get_ingest_docs()
-
-        # remove docs that have already been processed
-        if not self.reprocess:
-            docs = self._filter_docs_with_outputs(docs)
-            if not docs:
-                return
-
-        # Debugging tip: use the below line and comment out the mp.Pool loop
-        # block to remain in single process
-        # self.doc_processor_fn(docs[0])
-        logger.info(f"Processing {len(docs)} docs")
-        try:
-            with mp.Pool(
-                processes=self.num_processes,
-                initializer=ingest_log_streaming_init,
-                initargs=(logging.DEBUG if self.verbose else logging.INFO,),
-            ) as pool:
-                pool.map(self.doc_processor_fn, docs)
-        finally:
-            self.cleanup()
-
-
-def process_documents(
-    doc_connector: BaseConnector,
-    processor_config: ProcessorConfigs,
-    verbose=bool,
-) -> None:
-    process_document_with_partition_args = partial(
-        process_document,
-        strategy=processor_config.partition_strategy,
-        ocr_languages=processor_config.partition_ocr_languages,
-        encoding=processor_config.partition_encoding,
-        pdf_infer_table_structure=processor_config.partition_pdf_infer_table_structure,
-    )
-
-    Processor(
-        doc_connector=doc_connector,
-        doc_processor_fn=process_document_with_partition_args,
-        num_processes=processor_config.num_processes,
-        reprocess=processor_config.reprocess,
-        verbose=verbose,
-        max_docs=processor_config.max_docs,
-    ).run()
diff --git a/src/unstructured/ingest/runner/__init__.py b/src/unstructured/ingest/runner/__init__.py
deleted file mode 100644
index 5bf4285..0000000
--- a/src/unstructured/ingest/runner/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from .airtable import airtable
-from .azure import azure
-from .biomed import biomed
-from .box import box
-from .confluence import confluence
-from .discord import discord
-from .dropbox import dropbox
-from .elasticsearch import elasticsearch
-from .fsspec import fsspec
-from .gcs import gcs
-from .github import github
-from .gitlab import gitlab
-from .google_drive import gdrive
-from .local import local
-from .notion import notion
-from .onedrive import onedrive
-from .outlook import outlook
-from .reddit import reddit
-from .s3 import s3
-from .sharepoint import sharepoint
-from .slack import slack
-from .wikipedia import wikipedia
-
-__all__ = [
-    "airtable",
-    "azure",
-    "biomed",
-    "box",
-    "confluence",
-    "discord",
-    "dropbox",
-    "elasticsearch",
-    "fsspec",
-    "gcs",
-    "gdrive",
-    "github",
-    "gitlab",
-    "local",
-    "notion",
-    "onedrive",
-    "outlook",
-    "reddit",
-    "s3",
-    "sharepoint",
-    "slack",
-    "wikipedia",
-]
diff --git a/src/unstructured/ingest/runner/airtable.py b/src/unstructured/ingest/runner/airtable.py
deleted file mode 100644
index f1536f4..0000000
--- a/src/unstructured/ingest/runner/airtable.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def airtable(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    personal_access_token: str,
-    list_of_paths: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        personal_access_token.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.airtable import (
-        AirtableConnector,
-        SimpleAirtableConfig,
-    )
-
-    doc_connector = AirtableConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleAirtableConfig(
-            personal_access_token=personal_access_token,
-            list_of_paths=list_of_paths,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/azure.py b/src/unstructured/ingest/runner/azure.py
deleted file mode 100644
index 93bb796..0000000
--- a/src/unstructured/ingest/runner/azure.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def azure(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    account_name: Optional[str],
-    account_key: Optional[str],
-    connection_string: Optional[str],
-    remote_url: str,
-    recursive: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    if not account_name and not connection_string:
-        raise ValueError(
-            "missing either account-name or connection-string",
-        )
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.azure import (
-        AzureBlobStorageConnector,
-        SimpleAzureBlobStorageConfig,
-    )
-
-    if account_name:
-        access_kwargs = {
-            "account_name": account_name,
-            "account_key": account_key,
-        }
-    elif connection_string:
-        access_kwargs = {"connection_string": connection_string}
-    else:
-        access_kwargs = {}
-    doc_connector = AzureBlobStorageConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleAzureBlobStorageConfig(
-            path=remote_url,
-            recursive=recursive,
-            access_kwargs=access_kwargs,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/biomed.py b/src/unstructured/ingest/runner/biomed.py
deleted file mode 100644
index ca92e92..0000000
--- a/src/unstructured/ingest/runner/biomed.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def biomed(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    path: Optional[str],
-    api_id: Optional[str],
-    api_from: Optional[str],
-    api_until: Optional[str],
-    max_retries: int,
-    max_request_time: int,
-    decay: float,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    base_path = (
-        path
-        if path
-        else "{}-{}-{}".format(
-            api_id if api_id else "",
-            api_from if api_from else "",
-            api_until if api_until else "",
-        )
-    )
-
-    hashed_dir_name = hashlib.sha256(
-        base_path.encode("utf-8"),
-    )
-
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.biomed import (
-        BiomedConnector,
-        SimpleBiomedConfig,
-    )
-
-    doc_connector = BiomedConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleBiomedConfig(
-            path=path,
-            id_=api_id,
-            from_=api_from,
-            until=api_until,
-            max_retries=max_retries,
-            request_timeout=max_request_time,
-            decay=decay,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/box.py b/src/unstructured/ingest/runner/box.py
deleted file mode 100644
index 0b9961b..0000000
--- a/src/unstructured/ingest/runner/box.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def box(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    remote_url: str,
-    recursive: bool,
-    box_app_config: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.box import BoxConnector, SimpleBoxConfig
-
-    doc_connector = BoxConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleBoxConfig(
-            path=remote_url,
-            recursive=recursive,
-            access_kwargs={"box_app_config": box_app_config},
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/confluence.py b/src/unstructured/ingest/runner/confluence.py
deleted file mode 100644
index 366ecaf..0000000
--- a/src/unstructured/ingest/runner/confluence.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def confluence(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    url: str,
-    user_email: str,
-    api_token: str,
-    list_of_spaces: Optional[str],
-    max_num_of_spaces: int,
-    max_num_of_docs_from_each_space: int,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        url.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.confluence import (
-        ConfluenceConnector,
-        SimpleConfluenceConfig,
-    )
-
-    doc_connector = ConfluenceConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleConfluenceConfig(
-            url=url,
-            user_email=user_email,
-            api_token=api_token,
-            list_of_spaces=list_of_spaces,
-            max_number_of_spaces=max_num_of_spaces,
-            max_number_of_docs_from_each_space=max_num_of_docs_from_each_space,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/discord.py b/src/unstructured/ingest/runner/discord.py
deleted file mode 100644
index f94546a..0000000
--- a/src/unstructured/ingest/runner/discord.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def discord(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    channels: str,
-    token: str,
-    period: Optional[int],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        channels.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.discord import (
-        DiscordConnector,
-        SimpleDiscordConfig,
-    )
-
-    doc_connector = DiscordConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleDiscordConfig(
-            channels=SimpleDiscordConfig.parse_channels(channels),
-            days=period,
-            token=token,
-            verbose=verbose,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/dropbox.py b/src/unstructured/ingest/runner/dropbox.py
deleted file mode 100644
index 5e7aeee..0000000
--- a/src/unstructured/ingest/runner/dropbox.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def dropbox(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    remote_url: str,
-    recursive: bool,
-    token: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.dropbox import (
-        DropboxConnector,
-        SimpleDropboxConfig,
-    )
-
-    doc_connector = DropboxConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleDropboxConfig(
-            path=remote_url,
-            recursive=recursive,
-            access_kwargs={"token": token},
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/elasticsearch.py b/src/unstructured/ingest/runner/elasticsearch.py
deleted file mode 100644
index f6b066b..0000000
--- a/src/unstructured/ingest/runner/elasticsearch.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def elasticsearch(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    url: str,
-    index_name: str,
-    jq_query: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        f"{url}_{index_name}".encode(
-            "utf-8",
-        ),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.elasticsearch import (
-        ElasticsearchConnector,
-        SimpleElasticsearchConfig,
-    )
-
-    doc_connector = ElasticsearchConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleElasticsearchConfig(
-            url=url,
-            index_name=index_name,
-            jq_query=jq_query,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/fsspec.py b/src/unstructured/ingest/runner/fsspec.py
deleted file mode 100644
index a82b10b..0000000
--- a/src/unstructured/ingest/runner/fsspec.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import logging
-import warnings
-from urllib.parse import urlparse
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def fsspec(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    remote_url: str,
-    recursive: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    protocol = urlparse(remote_url).scheme
-    warnings.warn(
-        f"`fsspec` protocol {protocol} is not directly supported by `unstructured`,"
-        " so use it at your own risk. Supported protocols are `gcs`, `gs`, `s3`, `s3a`,"
-        "`dropbox`, `abfs` and `az`.",
-        UserWarning,
-    )
-
-    from unstructured.ingest.connector.fsspec import (
-        FsspecConnector,
-        SimpleFsspecConfig,
-    )
-
-    doc_connector = FsspecConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleFsspecConfig(
-            path=remote_url,
-            recursive=recursive,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/gcs.py b/src/unstructured/ingest/runner/gcs.py
deleted file mode 100644
index d309d23..0000000
--- a/src/unstructured/ingest/runner/gcs.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def gcs(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    remote_url: str,
-    recursive: bool,
-    token: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.gcs import GcsConnector, SimpleGcsConfig
-
-    doc_connector = GcsConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleGcsConfig(
-            path=remote_url,
-            recursive=recursive,
-            access_kwargs={"token": token},
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/github.py b/src/unstructured/ingest/runner/github.py
deleted file mode 100644
index 46d8bc8..0000000
--- a/src/unstructured/ingest/runner/github.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def github(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    url: str,
-    git_branch: str,
-    git_access_token: Optional[str],
-    git_file_glob: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        f"{url}_{git_branch}".encode(
-            "utf-8",
-        ),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.github import (
-        GitHubConnector,
-        SimpleGitHubConfig,
-    )
-
-    doc_connector = GitHubConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleGitHubConfig(
-            url=url,
-            access_token=git_access_token,
-            branch=git_branch,
-            file_glob=git_file_glob,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/gitlab.py b/src/unstructured/ingest/runner/gitlab.py
deleted file mode 100644
index a75bb0d..0000000
--- a/src/unstructured/ingest/runner/gitlab.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def gitlab(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    url: str,
-    git_branch: str,
-    git_access_token: Optional[str],
-    git_file_glob: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        f"{url}_{git_branch}".encode(
-            "utf-8",
-        ),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.gitlab import (
-        GitLabConnector,
-        SimpleGitLabConfig,
-    )
-
-    doc_connector = GitLabConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleGitLabConfig(
-            url=url,
-            access_token=git_access_token,
-            branch=git_branch,
-            file_glob=git_file_glob,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/google_drive.py b/src/unstructured/ingest/runner/google_drive.py
deleted file mode 100644
index ed1863b..0000000
--- a/src/unstructured/ingest/runner/google_drive.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def gdrive(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    service_account_key: str,
-    recursive: bool,
-    drive_id: str,
-    extension: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        drive_id.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.google_drive import (
-        GoogleDriveConnector,
-        SimpleGoogleDriveConfig,
-    )
-
-    doc_connector = GoogleDriveConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleGoogleDriveConfig(
-            drive_id=drive_id,
-            service_account_key=service_account_key,
-            recursive=recursive,
-            extension=extension,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/local.py b/src/unstructured/ingest/runner/local.py
deleted file mode 100644
index 4cde508..0000000
--- a/src/unstructured/ingest/runner/local.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init
-from unstructured.ingest.processor import process_documents
-
-
-def local(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    input_path: str,
-    recursive: bool,
-    file_glob: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    from unstructured.ingest.connector.local import (
-        LocalConnector,
-        SimpleLocalConfig,
-    )
-
-    doc_connector = LocalConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleLocalConfig(
-            input_path=input_path,
-            recursive=recursive,
-            file_glob=file_glob,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/notion.py b/src/unstructured/ingest/runner/notion.py
deleted file mode 100644
index 00f5c03..0000000
--- a/src/unstructured/ingest/runner/notion.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def notion(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    api_key: str,
-    recursive: bool,
-    page_ids: Optional[str] = "",
-    database_ids: Optional[str] = "",
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-    if not page_ids and not database_ids:
-        raise ValueError("no page ids nor database ids provided")
-
-    if page_ids and database_ids:
-        hashed_dir_name = hashlib.sha256(
-            f"{page_ids},{database_ids}".encode("utf-8"),
-        )
-    elif page_ids:
-        hashed_dir_name = hashlib.sha256(
-            page_ids.encode("utf-8"),
-        )
-    elif database_ids:
-        hashed_dir_name = hashlib.sha256(
-            database_ids.encode("utf-8"),
-        )
-    else:
-        raise ValueError("could not create local cache directory name")
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.notion.connector import (
-        NotionConnector,
-        SimpleNotionConfig,
-    )
-
-    doc_connector = NotionConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleNotionConfig(
-            page_ids=SimpleNotionConfig.parse_ids(ids_str=page_ids) if page_ids else [],
-            database_ids=SimpleNotionConfig.parse_ids(ids_str=database_ids) if database_ids else [],
-            api_key=api_key,
-            verbose=verbose,
-            recursive=recursive,
-            logger=logger,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/onedrive.py b/src/unstructured/ingest/runner/onedrive.py
deleted file mode 100644
index 4bc6cf9..0000000
--- a/src/unstructured/ingest/runner/onedrive.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def onedrive(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    tenant: str,
-    user_pname: str,
-    client_id: str,
-    client_cred: str,
-    authority_url: Optional[str],
-    path: Optional[str],
-    recursive: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        f"{tenant}_{user_pname}".encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.onedrive import (
-        OneDriveConnector,
-        SimpleOneDriveConfig,
-    )
-
-    doc_connector = OneDriveConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleOneDriveConfig(
-            client_id=client_id,
-            client_credential=client_cred,
-            user_pname=user_pname,
-            tenant=tenant,
-            authority_url=authority_url,
-            path=path,
-            recursive=recursive,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/outlook.py b/src/unstructured/ingest/runner/outlook.py
deleted file mode 100644
index 9afb65a..0000000
--- a/src/unstructured/ingest/runner/outlook.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def outlook(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    user_email: str,
-    client_id: Optional[str],
-    client_cred: Optional[str],
-    tenant: Optional[str],
-    authority_url: Optional[str],
-    outlook_folders: Optional[str],
-    recursive: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(user_email.encode("utf-8"))
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.outlook import (
-        OutlookConnector,
-        SimpleOutlookConfig,
-    )
-
-    doc_connector = OutlookConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleOutlookConfig(
-            client_id=client_id,
-            client_credential=client_cred,
-            user_email=user_email,
-            tenant=tenant,
-            authority_url=authority_url,
-            ms_outlook_folders=SimpleOutlookConfig.parse_folders(outlook_folders)
-            if outlook_folders
-            else [],
-            recursive=recursive,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/reddit.py b/src/unstructured/ingest/runner/reddit.py
deleted file mode 100644
index 1ea5220..0000000
--- a/src/unstructured/ingest/runner/reddit.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def reddit(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    subreddit_name: str,
-    client_id: Optional[str],
-    client_secret: Optional[str],
-    user_agent: str,
-    search_query: Optional[str],
-    num_posts: int,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        subreddit_name.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.reddit import (
-        RedditConnector,
-        SimpleRedditConfig,
-    )
-
-    doc_connector = RedditConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleRedditConfig(
-            subreddit_name=subreddit_name,
-            client_id=client_id,
-            client_secret=client_secret,
-            user_agent=user_agent,
-            search_query=search_query,
-            num_posts=num_posts,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/s3.py b/src/unstructured/ingest/runner/s3.py
deleted file mode 100644
index f07a7ab..0000000
--- a/src/unstructured/ingest/runner/s3.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import logging
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_remote_url
-
-
-def s3(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    remote_url: str,
-    recursive: bool,
-    anonymous: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    connector_config.download_dir = update_download_dir_remote_url(
-        connector_config=connector_config,
-        remote_url=remote_url,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.s3 import S3Connector, SimpleS3Config
-
-    doc_connector = S3Connector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleS3Config(
-            path=remote_url,
-            recursive=recursive,
-            access_kwargs={"anon": anonymous},
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/sharepoint.py b/src/unstructured/ingest/runner/sharepoint.py
deleted file mode 100644
index 6eb7c8e..0000000
--- a/src/unstructured/ingest/runner/sharepoint.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import hashlib
-import logging
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def sharepoint(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    site: str,
-    client_id: str,
-    client_cred: str,
-    files_only: bool,
-    path: str,
-    recursive: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        f"{site}_{path}".encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.sharepoint import (
-        SharepointConnector,
-        SimpleSharepointConfig,
-    )
-
-    doc_connector = SharepointConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleSharepointConfig(
-            client_id=client_id,
-            client_credential=client_cred,
-            site_url=site,
-            path=path,
-            process_pages=(not files_only),
-            recursive=recursive,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/slack.py b/src/unstructured/ingest/runner/slack.py
deleted file mode 100644
index 0607eb0..0000000
--- a/src/unstructured/ingest/runner/slack.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import hashlib
-import logging
-from typing import Optional
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def slack(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    channels: str,
-    token: str,
-    start_date: Optional[str],
-    end_date: Optional[str],
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        channels.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.slack import (
-        SimpleSlackConfig,
-        SlackConnector,
-    )
-
-    doc_connector = SlackConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleSlackConfig(
-            channels=SimpleSlackConfig.parse_channels(channels),
-            token=token,
-            oldest=start_date,
-            latest=end_date,
-            verbose=verbose,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/ingest/runner/utils.py b/src/unstructured/ingest/runner/utils.py
deleted file mode 100644
index 9e59b38..0000000
--- a/src/unstructured/ingest/runner/utils.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import logging
-from pathlib import Path
-
-from unstructured.ingest.interfaces import (
-    StandardConnectorConfig,
-)
-
-
-def update_download_dir_remote_url(
-    connector_config: StandardConnectorConfig,
-    remote_url: str,
-    logger: logging.Logger,
-) -> str:
-    hashed_dir_name = hashlib.sha256(remote_url.encode("utf-8"))
-    return update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-
-def update_download_dir_hash(
-    connector_config: StandardConnectorConfig,
-    hashed_dir_name: hashlib._Hash,
-    logger: logging.Logger,
-) -> str:
-    new_download_dir = connector_config.download_dir
-    if not connector_config.download_dir:
-        cache_path = Path.home() / ".cache" / "unstructured" / "ingest"
-        if not cache_path.exists():
-            cache_path.mkdir(parents=True, exist_ok=True)
-        download_dir = cache_path / hashed_dir_name.hexdigest()[:10]
-        if connector_config.preserve_downloads:
-            logger.warning(
-                f"Preserving downloaded files but download_dir is not specified,"
-                f" using {download_dir}",
-            )
-        new_download_dir = str(download_dir)
-        logger.debug(f"updating download directory to: {new_download_dir}")
-    return new_download_dir
diff --git a/src/unstructured/ingest/runner/wikipedia.py b/src/unstructured/ingest/runner/wikipedia.py
deleted file mode 100644
index f5fce83..0000000
--- a/src/unstructured/ingest/runner/wikipedia.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import hashlib
-import logging
-
-from unstructured.ingest.interfaces import ProcessorConfigs, StandardConnectorConfig
-from unstructured.ingest.logger import ingest_log_streaming_init, logger
-from unstructured.ingest.processor import process_documents
-from unstructured.ingest.runner.utils import update_download_dir_hash
-
-
-def wikipedia(
-    verbose: bool,
-    connector_config: StandardConnectorConfig,
-    processor_config: ProcessorConfigs,
-    page_title: str,
-    auto_suggest: bool,
-    **kwargs,
-):
-    ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO)
-
-    hashed_dir_name = hashlib.sha256(
-        page_title.encode("utf-8"),
-    )
-    connector_config.download_dir = update_download_dir_hash(
-        connector_config=connector_config,
-        hashed_dir_name=hashed_dir_name,
-        logger=logger,
-    )
-
-    from unstructured.ingest.connector.wikipedia import (
-        SimpleWikipediaConfig,
-        WikipediaConnector,
-    )
-
-    doc_connector = WikipediaConnector(  # type: ignore
-        standard_config=connector_config,
-        config=SimpleWikipediaConfig(
-            title=page_title,
-            auto_suggest=auto_suggest,
-        ),
-    )
-
-    process_documents(doc_connector=doc_connector, processor_config=processor_config)
diff --git a/src/unstructured/nlp/__init__.py b/src/unstructured/nlp/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unstructured/nlp/partition.py b/src/unstructured/nlp/partition.py
deleted file mode 100644
index 3ffa991..0000000
--- a/src/unstructured/nlp/partition.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# flake8: noqa
-from unstructured.partition.pdf import partition_pdf
-from unstructured.partition.text_type import (
-    is_bulleted_text,
-    is_possible_narrative_text,
-    is_possible_title,
-)
diff --git a/src/unstructured/partition/__init__.py b/src/unstructured/partition/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unstructured/staging/__init__.py b/src/unstructured/staging/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/test_image.py b/tests/test_image.py
index 083f540..6f192c5 100644
--- a/tests/test_image.py
+++ b/tests/test_image.py
@@ -1,5 +1,5 @@
-from unstructured.documents.pdf_parser.image import ImageDocument
-from unstructured.documents.html_utils import visualize_html, save_to_txt
+from bisheng_unstructured.documents.pdf_parser.image import ImageDocument
+from bisheng_unstructured.documents.html_utils import visualize_html, save_to_txt
 
 TEST_RT_URL = 'http://192.168.106.12:9001/v2.1/models/'
 
diff --git a/tests/test_partition.py b/tests/test_partition.py
index 4292287..117cae0 100644
--- a/tests/test_partition.py
+++ b/tests/test_partition.py
@@ -1,5 +1,5 @@
-from unstructured.partition.html import partition_html
-from unstructured.documents.html_utils import visualize_html, save_to_txt
+from bisheng_unstructured.partition.html import partition_html
+from bisheng_unstructured.documents.html_utils import visualize_html, save_to_txt
 
 
 def test_html1():
diff --git a/tests/test_partition_image.py b/tests/test_partition_image.py
index 809cfdb..7175e01 100644
--- a/tests/test_partition_image.py
+++ b/tests/test_partition_image.py
@@ -1,5 +1,5 @@
-from unstructured.partition.image import partition_image
-from unstructured.documents.markdown import (
+from bisheng_unstructured.partition.image import partition_image
+from bisheng_unstructured.documents.markdown import (
   transform_html_table_to_md,
   merge_html_tables)
 
@@ -42,12 +42,18 @@ def test3():
 def test4():
   html_text = """
 <table>
-  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th><th></th></thead>
-  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr>
-  <tr><td>PRImA [3]</td><td>M</td><td>:</td><td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
-  <tr><td></td><td>AA</td><td></td><td>Layouts of scanned US newspapers from the 20th century</td></tr>
-  <tr><td>TableBank[18]</td><td></td><td></td><td>Table region on modern scientific and business document</td></tr>
-  <tr><td>HJDataset [31]</td><td>F/M</td><td></td><td>Layouts of history Japanese documents</td></tr>
+  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th>
+  <th></th></thead>
+  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td>
+      <td>Layouts of modern scientific documents</td></tr>
+  <tr><td>PRImA [3]</td><td>M</td><td>:</td>
+      <td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
+  <tr><td></td><td>AA</td><td></td>
+      <td>Layouts of scanned US newspapers from the 20th century</td></tr>
+  <tr><td>TableBank[18]</td><td></td><td></td>
+      <td>Table region on modern scientific and business document</td></tr>
+  <tr><td>HJDataset [31]</td><td>F/M</td><td></td>
+      <td>Layouts of history Japanese documents</td></tr>
 </table>
   """
 
@@ -58,23 +64,35 @@ def test4():
 def test5():
   html_text1 = """
 <table>
-  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th><th></th></thead>
-  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr>
-  <tr><td>PRImA [3]</td><td>M</td><td>:</td><td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
-  <tr><td></td><td>AA</td><td></td><td>Layouts of scanned US newspapers from the 20th century</td></tr>
-  <tr><td>TableBank[18]</td><td></td><td></td><td>Table region on modern scientific and business document</td></tr>
-  <tr><td>HJDataset [31]</td><td>F/M</td><td></td><td>Layouts of history Japanese documents</td></tr>
+  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th>
+  <th></th></thead>
+  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td>
+      <td>Layouts of modern scientific documents</td></tr>
+  <tr><td>PRImA [3]</td><td>M</td><td>:</td>
+      <td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
+  <tr><td></td><td>AA</td><td></td>
+      <td>Layouts of scanned US newspapers from the 20th century</td></tr>
+  <tr><td>TableBank[18]</td><td></td><td></td>
+      <td>Table region on modern scientific and business document</td></tr>
+  <tr><td>HJDataset [31]</td><td>F/M</td><td></td>
+      <td>Layouts of history Japanese documents</td></tr>
 </table>
   """
 
   html_text2 = """
 <table>
-  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th><th></th></thead>
-  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td><td>Layouts of modern scientific documents</td></tr>
-  <tr><td>PRImA [3]</td><td>M</td><td>:</td><td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
-  <tr><td></td><td>AA</td><td></td><td>Layouts of scanned US newspapers from the 20th century</td></tr>
-  <tr><td>TableBank[18]</td><td></td><td></td><td>Table region on modern scientific and business document</td></tr>
-  <tr><td>HJDataset [31]</td><td>F/M</td><td></td><td>Layouts of history Japanese documents</td></tr>
+  <thead><th>Dataset</th><th></th><th>Base Model|Large Model|Notes</th>
+  <th></th></thead>
+  <tr><td>PubLayNet[38]</td><td>F/M</td><td>M</td>
+      <td>Layouts of modern scientific documents</td></tr>
+  <tr><td>PRImA [3]</td><td>M</td><td>:</td>
+      <td>Layouts of scanned modern magaxines and sciertific reports</td></tr>
+  <tr><td></td><td>AA</td><td></td>
+      <td>Layouts of scanned US newspapers from the 20th century</td></tr>
+  <tr><td>TableBank[18]</td><td></td><td></td>
+      <td>Table region on modern scientific and business document</td></tr>
+  <tr><td>HJDataset [31]</td><td>F/M</td><td></td>
+      <td>Layouts of history Japanese documents</td></tr>
 </table>
   """
 
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index 203ae80..a896772 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -1,5 +1,5 @@
-from unstructured.partition.pdf import partition_pdf
-from unstructured.documents.html_utils import visualize_html, save_to_txt
+from bisheng_unstructured.partition.pdf import partition_pdf
+from bisheng_unstructured.documents.html_utils import visualize_html, save_to_txt
 
 
 def test1():
diff --git a/tests/test_pdf_parser.py b/tests/test_pdf_parser.py
index 284f188..2826f63 100644
--- a/tests/test_pdf_parser.py
+++ b/tests/test_pdf_parser.py
@@ -1,8 +1,9 @@
-from unstructured.documents.pdf_parser.pdf import PDFDocument
-from unstructured.documents.html_utils import visualize_html, save_to_txt
+from bisheng_unstructured.documents.pdf_parser.pdf import PDFDocument
+from bisheng_unstructured.documents.html_utils import visualize_html, save_to_txt
 
 TEST_RT_URL = 'http://192.168.106.12:9001/v2.1/models/'
 
+
 def test_pdf_doc():
   url = TEST_RT_URL
   layout_ep = url + 'elem_layout_v1/infer'
@@ -10,7 +11,6 @@ def test_pdf_doc():
   rowcol_model_ep = url + 'elem_table_rowcol_detect_v1/infer'
   table_model_ep = url + 'elem_table_detect_v1/infer'
 
-
   model_params = {
     'layout_ep': layout_ep,
     'cell_model_ep': cell_model_ep,
@@ -37,7 +37,6 @@ def test_pdf_doc2():
   rowcol_model_ep = url + 'elem_table_rowcol_detect_v1/infer'
   table_model_ep = url + 'elem_table_detect_v1/infer'
 
-
   model_params = {
     'layout_ep': layout_ep,
     'cell_model_ep': cell_model_ep,
@@ -70,7 +69,6 @@ def test_pdf_doc3():
     'table_model_ep': table_model_ep,
   }
 
-
   filename = "examples/docs/sw-flp-1965-v1.pdf"
   pdf_doc = PDFDocument(
     file=filename,