From 305247b4e1a67b218c624357ad8427dd7d19bf0c Mon Sep 17 00:00:00 2001
From: Steve Canny <stcanny@gmail.com>
Date: Sat, 20 Apr 2024 20:08:20 -0700
Subject: [PATCH 1/2] chore: bump unstructured-inference pin (#2913)

**Summary**
Update dependencies to use the new version of `unstructured-inference`
released yesterday. Remedy a few small problems with `make pip-compile`
that stood in the way.
---
 CHANGELOG.md                              |  2 +-
 requirements/Makefile                     |  4 ++--
 requirements/base.txt                     |  2 +-
 requirements/deps/constraints.txt         |  3 ---
 requirements/dev.txt                      | 14 ++++++-------
 requirements/extra-paddleocr.txt          |  6 +++---
 requirements/extra-pdf-image.in           |  2 +-
 requirements/extra-pdf-image.txt          | 19 ++++++++----------
 requirements/huggingface.txt              | 12 +++++-------
 requirements/ingest/astra.txt             |  8 +++++---
 requirements/ingest/azure.txt             |  8 +++++---
 requirements/ingest/box.txt               |  4 +++-
 requirements/ingest/chroma.txt            | 20 +++++++++----------
 requirements/ingest/clarifai.txt          |  4 ++--
 requirements/ingest/delta-table.txt       |  2 +-
 requirements/ingest/discord.txt           |  2 +-
 requirements/ingest/embed-aws-bedrock.txt | 10 +++++-----
 requirements/ingest/embed-huggingface.txt | 24 +++++++++++------------
 requirements/ingest/embed-octoai.txt      |  6 +++---
 requirements/ingest/embed-openai.txt      | 16 +++++++--------
 requirements/ingest/embed-vertexai.txt    | 23 +++++++++++-----------
 requirements/ingest/gcs.txt               |  2 +-
 requirements/ingest/github.txt            |  4 +++-
 requirements/ingest/google-drive.txt      |  2 +-
 requirements/ingest/notion.txt            |  2 +-
 requirements/ingest/onedrive.txt          |  4 +++-
 requirements/ingest/outlook.txt           |  4 +++-
 requirements/ingest/qdrant.txt            | 10 ++++++----
 requirements/ingest/s3.txt                |  2 +-
 requirements/ingest/salesforce.txt        | 22 +++++++--------------
 requirements/ingest/sharepoint.txt        |  4 +++-
 requirements/ingest/weaviate.txt          |  8 ++++----
 requirements/test.txt                     | 10 +++++-----
 unstructured/__version__.py               |  2 +-
 34 files changed, 133 insertions(+), 134 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3080a3c43..ab0f43591b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.13.3-dev9
+## 0.13.3
 
 ### Enhancements
 
diff --git a/requirements/Makefile b/requirements/Makefile
index 2a6eee56d0..9c4175401f 100644
--- a/requirements/Makefile
+++ b/requirements/Makefile
@@ -24,11 +24,11 @@ compile-base:
 
 .PHONY: compile-all-base
 compile-all-base: compile-base compile-test compile-dev
-	@$(foreach file,$(BASE_REQUIREMENTS),echo "compiling: $(file)" && pip-compile --upgrade $(file);)
+	@$(foreach file,$(BASE_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;)
 
 .PHONY: compile-ingest
 compile-ingest:
-	@$(foreach file,$(INGEST_REQUIREMENTS),echo "compiling: $(file)" && pip-compile --upgrade $(file);)
+	@$(foreach file,$(INGEST_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;)
 
 .PHONY: clean
 clean: clean-base clean-ingest
diff --git a/requirements/base.txt b/requirements/base.txt
index 727efcbf8e..87ae0d05fa 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -69,7 +69,7 @@ python-magic==0.4.27
     # via -r ./base.in
 rapidfuzz==3.8.1
     # via -r ./base.in
-regex==2023.12.25
+regex==2024.4.16
     # via nltk
 requests==2.31.0
     # via
diff --git a/requirements/deps/constraints.txt b/requirements/deps/constraints.txt
index 18df4c7460..de28fc6357 100644
--- a/requirements/deps/constraints.txt
+++ b/requirements/deps/constraints.txt
@@ -22,8 +22,6 @@ Office365-REST-Python-Client<2.4.3
 # unstructured-inference to be upgraded when unstructured library is upgraded
 # https://github.com/Unstructured-IO/unstructured/issues/1458
 # unstructured-inference
-# unable to build wheel for arm on 0.3.3+
-safetensors<=0.3.2
 # use the known compatible version of weaviate and unstructured.pytesseract
 unstructured.pytesseract>=0.3.12
 weaviate-client>3.25.0
@@ -38,7 +36,6 @@ torch>2
 # pinned in unstructured paddleocr
 opencv-python==4.8.0.76
 opencv-contrib-python==4.8.0.76
-onnxruntime==1.15.1
 platformdirs==3.10.0
 
 # TODO: Constraint due to langchain, remove when that gets updated:
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 4fe5b454bc..477172e78f 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -73,7 +73,7 @@ defusedxml==0.7.1
     # via nbconvert
 distlib==0.3.8
     # via virtualenv
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via
     #   -c ./test.txt
     #   anyio
@@ -91,7 +91,7 @@ httpcore==1.0.5
     # via httpx
 httpx==0.27.0
     # via jupyterlab
-identify==2.5.35
+identify==2.5.36
     # via pre-commit
 idna==3.7
     # via
@@ -195,7 +195,7 @@ markupsafe==2.1.5
     # via
     #   jinja2
     #   nbconvert
-matplotlib-inline==0.1.6
+matplotlib-inline==0.1.7
     # via
     #   ipykernel
     #   ipython
@@ -216,7 +216,7 @@ nest-asyncio==1.6.0
     # via ipykernel
 nodeenv==1.8.0
     # via pre-commit
-notebook==7.1.2
+notebook==7.1.3
     # via jupyter
 notebook-shim==0.2.4
     # via
@@ -294,7 +294,7 @@ pyyaml==6.0.1
     #   -c ./test.txt
     #   jupyter-events
     #   pre-commit
-pyzmq==25.1.2
+pyzmq==26.0.2
     # via
     #   ipykernel
     #   jupyter-client
@@ -368,7 +368,7 @@ tornado==6.4
     #   jupyterlab
     #   notebook
     #   terminado
-traitlets==5.14.2
+traitlets==5.14.3
     # via
     #   comm
     #   ipykernel
@@ -401,7 +401,7 @@ urllib3==1.26.18
     #   -c ./base.txt
     #   -c ./test.txt
     #   requests
-virtualenv==20.25.1
+virtualenv==20.25.3
     # via pre-commit
 wcwidth==0.2.13
     # via prompt-toolkit
diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt
index d93facdad5..114316e73f 100644
--- a/requirements/extra-paddleocr.txt
+++ b/requirements/extra-paddleocr.txt
@@ -63,7 +63,7 @@ importlib-metadata==7.1.0
     # via flask
 importlib-resources==6.4.0
     # via matplotlib
-itsdangerous==2.1.2
+itsdangerous==2.2.0
     # via flask
 jinja2==3.1.3
     # via
@@ -188,7 +188,7 @@ scipy==1.10.1
     #   -c ././deps/constraints.txt
     #   imgaug
     #   scikit-image
-shapely==2.0.3
+shapely==2.0.4
     # via
     #   imgaug
     #   unstructured-paddleocr
@@ -200,7 +200,7 @@ six==1.16.0
     #   imgaug
     #   python-dateutil
     #   visualdl
-tifffile==2024.2.12
+tifffile==2024.4.18
     # via scikit-image
 tqdm==4.66.2
     # via
diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in
index 208569c43e..f6e003d1a3 100644
--- a/requirements/extra-pdf-image.in
+++ b/requirements/extra-pdf-image.in
@@ -9,7 +9,7 @@ pillow_heif
 pypdf
 # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
 # when unstructured library is.
-unstructured-inference==0.7.25
+unstructured-inference==0.7.27
 # unstructured fork of pytesseract that provides an interface to allow for multiple output formats
 # from one tesseract call
 unstructured.pytesseract>=0.3.12
diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt
index 53dc5e40d5..2d902f0194 100644
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@@ -100,10 +100,8 @@ onnx==1.16.0
     # via
     #   -r ./extra-pdf-image.in
     #   unstructured-inference
-onnxruntime==1.15.1
-    # via
-    #   -c ././deps/constraints.txt
-    #   unstructured-inference
+onnxruntime==1.17.3
+    # via unstructured-inference
 opencv-python==4.8.0.76
     # via
     #   -c ././deps/constraints.txt
@@ -132,7 +130,7 @@ pdfminer-six==20231228
     #   pdfplumber
 pdfplumber==0.11.0
     # via layoutparser
-pikepdf==8.15.0
+pikepdf==8.15.1
     # via -r ./extra-pdf-image.in
 pillow==10.3.0
     # via
@@ -190,7 +188,7 @@ rapidfuzz==3.8.1
     # via
     #   -c ./base.txt
     #   unstructured-inference
-regex==2023.12.25
+regex==2024.4.16
     # via
     #   -c ./base.txt
     #   transformers
@@ -199,9 +197,8 @@ requests==2.31.0
     #   -c ./base.txt
     #   huggingface-hub
     #   transformers
-safetensors==0.3.2
+safetensors==0.4.3
     # via
-    #   -c ././deps/constraints.txt
     #   timm
     #   transformers
 scipy==1.10.1
@@ -218,7 +215,7 @@ sympy==1.12
     #   torch
 timm==0.9.16
     # via effdet
-tokenizers==0.15.2
+tokenizers==0.19.1
     # via transformers
 torch==2.2.2
     # via
@@ -238,7 +235,7 @@ tqdm==4.66.2
     #   huggingface-hub
     #   iopath
     #   transformers
-transformers==4.37.1
+transformers==4.40.0
     # via unstructured-inference
 typing-extensions==4.11.0
     # via
@@ -249,7 +246,7 @@ typing-extensions==4.11.0
     #   torch
 tzdata==2024.1
     # via pandas
-unstructured-inference==0.7.25
+unstructured-inference==0.7.27
     # via -r ./extra-pdf-image.in
 unstructured-pytesseract==0.3.12
     # via
diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt
index 9b58d6197f..73e03b0337 100644
--- a/requirements/huggingface.txt
+++ b/requirements/huggingface.txt
@@ -64,7 +64,7 @@ pyyaml==6.0.1
     # via
     #   huggingface-hub
     #   transformers
-regex==2023.12.25
+regex==2024.4.16
     # via
     #   -c ./base.txt
     #   sacremoses
@@ -76,10 +76,8 @@ requests==2.31.0
     #   transformers
 sacremoses==0.1.1
     # via -r ./huggingface.in
-safetensors==0.3.2
-    # via
-    #   -c ././deps/constraints.txt
-    #   transformers
+safetensors==0.4.3
+    # via transformers
 sentencepiece==0.2.0
     # via -r ./huggingface.in
 six==1.16.0
@@ -88,7 +86,7 @@ six==1.16.0
     #   langdetect
 sympy==1.12
     # via torch
-tokenizers==0.15.2
+tokenizers==0.19.1
     # via transformers
 torch==2.2.2
     # via
@@ -100,7 +98,7 @@ tqdm==4.66.2
     #   huggingface-hub
     #   sacremoses
     #   transformers
-transformers==4.37.1
+transformers==4.40.0
     # via -r ./huggingface.in
 typing-extensions==4.11.0
     # via
diff --git a/requirements/ingest/astra.txt b/requirements/ingest/astra.txt
index 184d4422f8..0e8c50605b 100644
--- a/requirements/ingest/astra.txt
+++ b/requirements/ingest/astra.txt
@@ -14,7 +14,7 @@ bson==0.5.10
     # via astrapy
 cassandra-driver==3.29.1
     # via cassio
-cassio==0.1.5
+cassio==0.1.6
     # via astrapy
 certifi==2024.2.2
     # via
@@ -33,7 +33,7 @@ click==8.1.7
     #   geomet
 deprecation==2.1.0
     # via astrapy
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
 geomet==0.2.1.post1
     # via cassandra-driver
@@ -46,7 +46,9 @@ hpack==4.0.0
 httpcore==1.0.5
     # via httpx
 httpx[http2]==0.27.0
-    # via astrapy
+    # via
+    #   astrapy
+    #   httpx
 hyperframe==6.0.1
     # via h2
 idna==3.7
diff --git a/requirements/ingest/azure.txt b/requirements/ingest/azure.txt
index 5aebf17a27..2c48b6950e 100644
--- a/requirements/ingest/azure.txt
+++ b/requirements/ingest/azure.txt
@@ -4,9 +4,9 @@
 #
 #    pip-compile ./ingest/azure.in
 #
-adlfs==2024.2.0
+adlfs==2024.4.1
     # via -r ./ingest/azure.in
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via adlfs
 aiosignal==1.3.1
     # via aiohttp
@@ -80,7 +80,9 @@ portalocker==2.8.2
 pycparser==2.22
     # via cffi
 pyjwt[crypto]==2.8.0
-    # via msal
+    # via
+    #   msal
+    #   pyjwt
 requests==2.31.0
     # via
     #   -c ./ingest/../base.txt
diff --git a/requirements/ingest/box.txt b/requirements/ingest/box.txt
index 80244e0885..2f3c8980ab 100644
--- a/requirements/ingest/box.txt
+++ b/requirements/ingest/box.txt
@@ -9,7 +9,9 @@ attrs==23.2.0
 boxfs==0.3.0
     # via -r ./ingest/box.in
 boxsdk[jwt]==3.9.2
-    # via boxfs
+    # via
+    #   boxfs
+    #   boxsdk
 certifi==2024.2.2
     # via
     #   -c ./ingest/../base.txt
diff --git a/requirements/ingest/chroma.txt b/requirements/ingest/chroma.txt
index 25a35a2941..d4acacdc3a 100644
--- a/requirements/ingest/chroma.txt
+++ b/requirements/ingest/chroma.txt
@@ -44,9 +44,9 @@ coloredlogs==15.0.1
     # via onnxruntime
 deprecated==1.2.14
     # via opentelemetry-api
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
-fastapi==0.110.1
+fastapi==0.110.2
     # via chromadb
 filelock==3.13.4
     # via huggingface-hub
@@ -58,7 +58,7 @@ google-auth==2.29.0
     # via kubernetes
 googleapis-common-protos==1.63.0
     # via opentelemetry-exporter-otlp-proto-grpc
-grpcio==1.62.1
+grpcio==1.62.2
     # via
     #   chromadb
     #   opentelemetry-exporter-otlp-proto-grpc
@@ -95,10 +95,8 @@ oauthlib==3.2.2
     # via
     #   kubernetes
     #   requests-oauthlib
-onnxruntime==1.15.1
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   chromadb
+onnxruntime==1.17.3
+    # via chromadb
 opentelemetry-api==1.16.0
     # via
     #   chromadb
@@ -131,7 +129,7 @@ protobuf==4.23.4
     #   googleapis-common-protos
     #   onnxruntime
     #   opentelemetry-proto
-pulsar-client==3.4.0
+pulsar-client==3.5.0
     # via chromadb
 pyasn1==0.6.0
     # via
@@ -186,7 +184,7 @@ sympy==1.12
     # via onnxruntime
 tenacity==8.2.3
     # via chromadb
-tokenizers==0.15.2
+tokenizers==0.19.1
     # via chromadb
 tqdm==4.66.2
     # via
@@ -216,7 +214,9 @@ urllib3==1.26.18
     #   kubernetes
     #   requests
 uvicorn[standard]==0.29.0
-    # via chromadb
+    # via
+    #   chromadb
+    #   uvicorn
 uvloop==0.19.0
     # via uvicorn
 watchfiles==0.21.0
diff --git a/requirements/ingest/clarifai.txt b/requirements/ingest/clarifai.txt
index e58f80efc8..374b977da0 100644
--- a/requirements/ingest/clarifai.txt
+++ b/requirements/ingest/clarifai.txt
@@ -13,7 +13,7 @@ charset-normalizer==3.3.2
     # via
     #   -c ./ingest/../base.txt
     #   requests
-clarifai==10.3.0
+clarifai==10.3.1
     # via -r ./ingest/clarifai.in
 clarifai-grpc==10.2.3
     # via clarifai
@@ -21,7 +21,7 @@ contextlib2==21.6.0
     # via schema
 googleapis-common-protos==1.63.0
     # via clarifai-grpc
-grpcio==1.62.1
+grpcio==1.62.2
     # via clarifai-grpc
 idna==3.7
     # via
diff --git a/requirements/ingest/delta-table.txt b/requirements/ingest/delta-table.txt
index c6bea36584..1053728f11 100644
--- a/requirements/ingest/delta-table.txt
+++ b/requirements/ingest/delta-table.txt
@@ -12,7 +12,7 @@ numpy==1.26.4
     # via
     #   -c ./ingest/../base.txt
     #   pyarrow
-pyarrow==15.0.2
+pyarrow==16.0.0
     # via deltalake
 pyarrow-hotfix==0.6
     # via deltalake
diff --git a/requirements/ingest/discord.txt b/requirements/ingest/discord.txt
index 5ac3a1937e..5187978136 100644
--- a/requirements/ingest/discord.txt
+++ b/requirements/ingest/discord.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/discord.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via discord-py
 aiosignal==1.3.1
     # via aiohttp
diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt
index a407d81f93..31052577e9 100644
--- a/requirements/ingest/embed-aws-bedrock.txt
+++ b/requirements/ingest/embed-aws-bedrock.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/embed-aws-bedrock.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via langchain-community
 aiosignal==1.3.1
     # via aiohttp
@@ -51,11 +51,11 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==2.4
     # via jsonpatch
-langchain-community==0.0.32
+langchain-community==0.0.34
     # via -r ./ingest/embed-aws-bedrock.in
-langchain-core==0.1.42
+langchain-core==0.1.45
     # via langchain-community
-langsmith==0.1.46
+langsmith==0.1.49
     # via
     #   langchain-community
     #   langchain-core
@@ -75,7 +75,7 @@ numpy==1.26.4
     # via
     #   -c ./ingest/../base.txt
     #   langchain-community
-orjson==3.10.0
+orjson==3.10.1
     # via langsmith
 packaging==23.2
     # via
diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt
index bbdfee7bd9..ce967ba576 100644
--- a/requirements/ingest/embed-huggingface.txt
+++ b/requirements/ingest/embed-huggingface.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/embed-huggingface.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via langchain-community
 aiosignal==1.3.1
     # via aiohttp
@@ -62,11 +62,11 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==2.4
     # via jsonpatch
-langchain-community==0.0.32
+langchain-community==0.0.34
     # via -r ./ingest/embed-huggingface.in
-langchain-core==0.1.42
+langchain-core==0.1.45
     # via langchain-community
-langsmith==0.1.46
+langsmith==0.1.49
     # via
     #   langchain-community
     #   langchain-core
@@ -96,7 +96,7 @@ numpy==1.26.4
     #   scipy
     #   sentence-transformers
     #   transformers
-orjson==3.10.0
+orjson==3.10.1
     # via langsmith
 packaging==23.2
     # via
@@ -120,7 +120,7 @@ pyyaml==6.0.1
     #   langchain-community
     #   langchain-core
     #   transformers
-regex==2023.12.25
+regex==2024.4.16
     # via
     #   -c ./ingest/../base.txt
     #   transformers
@@ -131,10 +131,8 @@ requests==2.31.0
     #   langchain-community
     #   langsmith
     #   transformers
-safetensors==0.3.2
-    # via
-    #   -c ./ingest/../deps/constraints.txt
-    #   transformers
+safetensors==0.4.3
+    # via transformers
 scikit-learn==1.4.2
     # via sentence-transformers
 scipy==1.10.1
@@ -142,7 +140,7 @@ scipy==1.10.1
     #   -c ./ingest/../deps/constraints.txt
     #   scikit-learn
     #   sentence-transformers
-sentence-transformers==2.6.1
+sentence-transformers==2.7.0
     # via -r ./ingest/embed-huggingface.in
 sqlalchemy==2.0.29
     # via langchain-community
@@ -154,7 +152,7 @@ tenacity==8.2.3
     #   langchain-core
 threadpoolctl==3.4.0
     # via scikit-learn
-tokenizers==0.15.2
+tokenizers==0.19.1
     # via transformers
 torch==2.2.2
     # via
@@ -166,7 +164,7 @@ tqdm==4.66.2
     #   huggingface-hub
     #   sentence-transformers
     #   transformers
-transformers==4.37.1
+transformers==4.40.0
     # via sentence-transformers
 typing-extensions==4.11.0
     # via
diff --git a/requirements/ingest/embed-octoai.txt b/requirements/ingest/embed-octoai.txt
index f8a265c61c..d43b301b14 100644
--- a/requirements/ingest/embed-octoai.txt
+++ b/requirements/ingest/embed-octoai.txt
@@ -24,7 +24,7 @@ charset-normalizer==3.3.2
     #   requests
 distro==1.9.0
     # via openai
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
 h11==0.14.0
     # via httpcore
@@ -38,13 +38,13 @@ idna==3.7
     #   anyio
     #   httpx
     #   requests
-openai==1.17.0
+openai==1.23.2
     # via -r ./ingest/embed-octoai.in
 pydantic==2.7.0
     # via openai
 pydantic-core==2.18.1
     # via pydantic
-regex==2023.12.25
+regex==2024.4.16
     # via
     #   -c ./ingest/../base.txt
     #   tiktoken
diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt
index 13cd1a6802..dae330e673 100644
--- a/requirements/ingest/embed-openai.txt
+++ b/requirements/ingest/embed-openai.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/embed-openai.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via langchain-community
 aiosignal==1.3.1
     # via aiohttp
@@ -36,7 +36,7 @@ dataclasses-json==0.6.4
     #   langchain-community
 distro==1.9.0
     # via openai
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
 frozenlist==1.4.1
     # via
@@ -59,11 +59,11 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==2.4
     # via jsonpatch
-langchain-community==0.0.32
+langchain-community==0.0.34
     # via -r ./ingest/embed-openai.in
-langchain-core==0.1.42
+langchain-core==0.1.45
     # via langchain-community
-langsmith==0.1.46
+langsmith==0.1.49
     # via
     #   langchain-community
     #   langchain-core
@@ -83,9 +83,9 @@ numpy==1.26.4
     # via
     #   -c ./ingest/../base.txt
     #   langchain-community
-openai==1.17.0
+openai==1.23.2
     # via -r ./ingest/embed-openai.in
-orjson==3.10.0
+orjson==3.10.1
     # via langsmith
 packaging==23.2
     # via
@@ -104,7 +104,7 @@ pyyaml==6.0.1
     # via
     #   langchain-community
     #   langchain-core
-regex==2023.12.25
+regex==2024.4.16
     # via
     #   -c ./ingest/../base.txt
     #   tiktoken
diff --git a/requirements/ingest/embed-vertexai.txt b/requirements/ingest/embed-vertexai.txt
index c5442f9ed4..39aad94d13 100644
--- a/requirements/ingest/embed-vertexai.txt
+++ b/requirements/ingest/embed-vertexai.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/embed-vertexai.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via
     #   langchain
     #   langchain-community
@@ -42,6 +42,7 @@ frozenlist==1.4.1
     #   aiosignal
 google-api-core[grpc]==2.18.0
     # via
+    #   google-api-core
     #   google-cloud-aiplatform
     #   google-cloud-bigquery
     #   google-cloud-core
@@ -55,9 +56,9 @@ google-auth==2.29.0
     #   google-cloud-core
     #   google-cloud-resource-manager
     #   google-cloud-storage
-google-cloud-aiplatform==1.47.0
+google-cloud-aiplatform==1.48.0
     # via langchain-google-vertexai
-google-cloud-bigquery==3.20.1
+google-cloud-bigquery==3.21.0
     # via google-cloud-aiplatform
 google-cloud-core==2.4.1
     # via
@@ -84,13 +85,13 @@ googleapis-common-protos[grpc]==1.63.0
     #   grpcio-status
 grpc-google-iam-v1==0.13.0
     # via google-cloud-resource-manager
-grpcio==1.62.1
+grpcio==1.62.2
     # via
     #   google-api-core
     #   googleapis-common-protos
     #   grpc-google-iam-v1
     #   grpcio-status
-grpcio-status==1.62.1
+grpcio-status==1.62.2
     # via google-api-core
 idna==3.7
     # via
@@ -105,11 +106,11 @@ jsonpointer==2.4
     # via jsonpatch
 langchain==0.1.16
     # via -r ./ingest/embed-vertexai.in
-langchain-community==0.0.32
+langchain-community==0.0.34
     # via
     #   -r ./ingest/embed-vertexai.in
     #   langchain
-langchain-core==0.1.42
+langchain-core==0.1.45
     # via
     #   langchain
     #   langchain-community
@@ -119,7 +120,7 @@ langchain-google-vertexai==1.0.1
     # via -r ./ingest/embed-vertexai.in
 langchain-text-splitters==0.0.1
     # via langchain
-langsmith==0.1.46
+langsmith==0.1.49
     # via
     #   langchain
     #   langchain-community
@@ -142,7 +143,7 @@ numpy==1.26.4
     #   langchain
     #   langchain-community
     #   shapely
-orjson==3.10.0
+orjson==3.10.1
     # via langsmith
 packaging==23.2
     # via
@@ -201,7 +202,7 @@ requests==2.31.0
     #   langsmith
 rsa==4.9
     # via google-auth
-shapely==2.0.3
+shapely==2.0.4
     # via google-cloud-aiplatform
 six==1.16.0
     # via
@@ -216,7 +217,7 @@ tenacity==8.2.3
     #   langchain
     #   langchain-community
     #   langchain-core
-types-protobuf==4.25.0.20240410
+types-protobuf==4.25.0.20240417
     # via langchain-google-vertexai
 types-requests==2.31.0.6
     # via langchain-google-vertexai
diff --git a/requirements/ingest/gcs.txt b/requirements/ingest/gcs.txt
index 93cd3360e2..48c5c7f6d2 100644
--- a/requirements/ingest/gcs.txt
+++ b/requirements/ingest/gcs.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile ./ingest/gcs.in
 #
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via gcsfs
 aiosignal==1.3.1
     # via aiohttp
diff --git a/requirements/ingest/github.txt b/requirements/ingest/github.txt
index ed8ec5fdb1..18e29fc3c4 100644
--- a/requirements/ingest/github.txt
+++ b/requirements/ingest/github.txt
@@ -30,7 +30,9 @@ pycparser==2.22
 pygithub==2.3.0
     # via -r ./ingest/github.in
 pyjwt[crypto]==2.8.0
-    # via pygithub
+    # via
+    #   pygithub
+    #   pyjwt
 pynacl==1.5.0
     # via pygithub
 requests==2.31.0
diff --git a/requirements/ingest/google-drive.txt b/requirements/ingest/google-drive.txt
index 297374a0f8..86aa1c2df7 100644
--- a/requirements/ingest/google-drive.txt
+++ b/requirements/ingest/google-drive.txt
@@ -17,7 +17,7 @@ charset-normalizer==3.3.2
     #   requests
 google-api-core==2.18.0
     # via google-api-python-client
-google-api-python-client==2.125.0
+google-api-python-client==2.126.0
     # via -r ./ingest/google-drive.in
 google-auth==2.29.0
     # via
diff --git a/requirements/ingest/notion.txt b/requirements/ingest/notion.txt
index cb19ab6d29..fcc3ac65c5 100644
--- a/requirements/ingest/notion.txt
+++ b/requirements/ingest/notion.txt
@@ -14,7 +14,7 @@ certifi==2024.2.2
     #   -c ./ingest/../deps/constraints.txt
     #   httpcore
     #   httpx
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
 h11==0.14.0
     # via httpcore
diff --git a/requirements/ingest/onedrive.txt b/requirements/ingest/onedrive.txt
index ced2374b7d..8922ec418c 100644
--- a/requirements/ingest/onedrive.txt
+++ b/requirements/ingest/onedrive.txt
@@ -40,7 +40,9 @@ office365-rest-python-client==2.4.2
 pycparser==2.22
     # via cffi
 pyjwt[crypto]==2.8.0
-    # via msal
+    # via
+    #   msal
+    #   pyjwt
 pytz==2024.1
     # via office365-rest-python-client
 requests==2.31.0
diff --git a/requirements/ingest/outlook.txt b/requirements/ingest/outlook.txt
index 9a6ecbe3e4..2129b31be5 100644
--- a/requirements/ingest/outlook.txt
+++ b/requirements/ingest/outlook.txt
@@ -34,7 +34,9 @@ office365-rest-python-client==2.4.2
 pycparser==2.22
     # via cffi
 pyjwt[crypto]==2.8.0
-    # via msal
+    # via
+    #   msal
+    #   pyjwt
 pytz==2024.1
     # via office365-rest-python-client
 requests==2.31.0
diff --git a/requirements/ingest/qdrant.txt b/requirements/ingest/qdrant.txt
index 277a5ad599..41b0c25d26 100644
--- a/requirements/ingest/qdrant.txt
+++ b/requirements/ingest/qdrant.txt
@@ -16,13 +16,13 @@ certifi==2024.2.2
     #   -c ./ingest/../deps/constraints.txt
     #   httpcore
     #   httpx
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
-grpcio==1.62.1
+grpcio==1.62.2
     # via
     #   grpcio-tools
     #   qdrant-client
-grpcio-tools==1.62.1
+grpcio-tools==1.62.2
     # via qdrant-client
 h11==0.14.0
     # via httpcore
@@ -33,7 +33,9 @@ hpack==4.0.0
 httpcore==1.0.5
     # via httpx
 httpx[http2]==0.27.0
-    # via qdrant-client
+    # via
+    #   httpx
+    #   qdrant-client
 hyperframe==6.0.1
     # via h2
 idna==3.7
diff --git a/requirements/ingest/s3.txt b/requirements/ingest/s3.txt
index d5ad8f8589..2be658633e 100644
--- a/requirements/ingest/s3.txt
+++ b/requirements/ingest/s3.txt
@@ -6,7 +6,7 @@
 #
 aiobotocore==2.12.3
     # via s3fs
-aiohttp==3.9.4
+aiohttp==3.9.5
     # via
     #   aiobotocore
     #   s3fs
diff --git a/requirements/ingest/salesforce.txt b/requirements/ingest/salesforce.txt
index 2d8f69bd86..07e2598555 100644
--- a/requirements/ingest/salesforce.txt
+++ b/requirements/ingest/salesforce.txt
@@ -18,7 +18,7 @@ charset-normalizer==3.3.2
     #   -c ./ingest/../base.txt
     #   requests
 cryptography==42.0.5
-    # via simple-salesforce
+    # via pyjwt
 idna==3.7
     # via
     #   -c ./ingest/../base.txt
@@ -32,21 +32,14 @@ lxml==4.9.4
     #   zeep
 more-itertools==10.2.0
     # via simple-salesforce
-pendulum==3.0.0
-    # via simple-salesforce
 platformdirs==3.10.0
     # via
     #   -c ./ingest/../deps/constraints.txt
     #   zeep
 pycparser==2.22
     # via cffi
-pyjwt==2.8.0
+pyjwt[crypto]==2.8.0
     # via simple-salesforce
-python-dateutil==2.9.0.post0
-    # via
-    #   -c ./ingest/../base.txt
-    #   pendulum
-    #   time-machine
 pytz==2024.1
     # via zeep
 requests==2.31.0
@@ -60,17 +53,16 @@ requests-file==2.0.0
     # via zeep
 requests-toolbelt==1.0.0
     # via zeep
-simple-salesforce==1.12.5
+simple-salesforce==1.12.6
     # via -r ./ingest/salesforce.in
 six==1.16.0
     # via
     #   -c ./ingest/../base.txt
     #   isodate
-    #   python-dateutil
-time-machine==2.14.1
-    # via pendulum
-tzdata==2024.1
-    # via pendulum
+typing-extensions==4.11.0
+    # via
+    #   -c ./ingest/../base.txt
+    #   simple-salesforce
 urllib3==1.26.18
     # via
     #   -c ./ingest/../base.txt
diff --git a/requirements/ingest/sharepoint.txt b/requirements/ingest/sharepoint.txt
index 4eb8e6b15e..9167a159ed 100644
--- a/requirements/ingest/sharepoint.txt
+++ b/requirements/ingest/sharepoint.txt
@@ -34,7 +34,9 @@ office365-rest-python-client==2.4.2
 pycparser==2.22
     # via cffi
 pyjwt[crypto]==2.8.0
-    # via msal
+    # via
+    #   msal
+    #   pyjwt
 pytz==2024.1
     # via office365-rest-python-client
 requests==2.31.0
diff --git a/requirements/ingest/weaviate.txt b/requirements/ingest/weaviate.txt
index bebb5b6912..41a3bf3115 100644
--- a/requirements/ingest/weaviate.txt
+++ b/requirements/ingest/weaviate.txt
@@ -27,16 +27,16 @@ charset-normalizer==3.3.2
     #   requests
 cryptography==42.0.5
     # via authlib
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via anyio
-grpcio==1.62.1
+grpcio==1.62.2
     # via
     #   grpcio-health-checking
     #   grpcio-tools
     #   weaviate-client
-grpcio-health-checking==1.62.1
+grpcio-health-checking==1.62.2
     # via weaviate-client
-grpcio-tools==1.62.1
+grpcio-tools==1.62.2
     # via weaviate-client
 h11==0.14.0
     # via httpcore
diff --git a/requirements/test.txt b/requirements/test.txt
index 5833f0e2bc..9034cd9818 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -10,7 +10,7 @@ appdirs==1.4.4
     # via label-studio-tools
 autoflake==2.3.1
     # via -r ./test.in
-black==24.3.0
+black==24.4.0
     # via -r ./test.in
 certifi==2024.2.2
     # via
@@ -29,7 +29,7 @@ coverage[toml]==7.4.4
     # via
     #   -r ./test.in
     #   pytest-cov
-exceptiongroup==1.2.0
+exceptiongroup==1.2.1
     # via pytest
 flake8==7.0.0
     # via
@@ -39,7 +39,7 @@ flake8-print==5.0.0
     # via -r ./test.in
 freezegun==1.4.0
     # via -r ./test.in
-grpcio==1.62.1
+grpcio==1.62.2
     # via -r ./test.in
 idna==3.7
     # via
@@ -81,7 +81,7 @@ platformdirs==3.10.0
     # via
     #   -c ././deps/constraints.txt
     #   black
-pluggy==1.4.0
+pluggy==1.5.0
     # via pytest
 pycodestyle==2.11.1
     # via
@@ -115,7 +115,7 @@ requests==2.31.0
     # via
     #   -c ./base.txt
     #   label-studio-sdk
-ruff==0.3.7
+ruff==0.4.1
     # via -r ./test.in
 six==1.16.0
     # via
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index d0b6ef94a7..10b9b96d79 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.13.3-dev9"  # pragma: no cover
+__version__ = "0.13.3"  # pragma: no cover

From 05ff9750813b1e18121a66143ef274ac271358b0 Mon Sep 17 00:00:00 2001
From: Steve Canny <stcanny@gmail.com>
Date: Mon, 22 Apr 2024 16:58:17 -0700
Subject: [PATCH 2/2] fix: remove unused `ElementMetadata.section` (#2921)

**Summary**
The `.section` field in `ElementMetadata` is dead code, possibly a
remainder from a prior iteration of `partition_epub()`. In any case, it
is not populated by any partitioner. Remove it and any code that uses
it.
---
 CHANGELOG.md                                  | 10 +++
 docs/source/core/chunking.rst                 |  7 ---
 test_unstructured/chunking/test_base.py       | 63 -------------------
 test_unstructured/chunking/test_title.py      | 37 -----------
 test_unstructured/partition/epub/test_epub.py |  2 -
 test_unstructured/staging/test_base.py        |  3 -
 unstructured/__version__.py                   |  2 +-
 unstructured/chunking/base.py                 | 45 -------------
 unstructured/chunking/title.py                |  2 -
 unstructured/documents/elements.py            | 18 +++---
 unstructured/partition/common.py              |  2 -
 11 files changed, 21 insertions(+), 170 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab0f43591b..c484e9eca1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## 0.13.4-dev0
+
+### Enhancements
+
+### Features
+
+### Fixes
+
+* **Remove ElementMetadata.section field.**. This field was unused, not populated by any partitioners.
+
 ## 0.13.3
 
 ### Enhancements
diff --git a/docs/source/core/chunking.rst b/docs/source/core/chunking.rst
index ee23620d59..7a9bfef11e 100644
--- a/docs/source/core/chunking.rst
+++ b/docs/source/core/chunking.rst
@@ -152,13 +152,6 @@ following behaviors:
   ``Title`` element would fit in the prior chunk. This implements the first aspect of the "preserve
   section boundaries" contract.
 
-- **Detect metadata.section change.** An element with a new value in ``element.metadata.section`` is
-  considered to start a new section. When a change in this value is encountered a new chunk is
-  started. This implements the second aspect of preserving section boundaries. This metadata is not
-  present in all document formats so is not used alone. An element having ``None`` for this metadata
-  field is considered to be part of the prior section; a section break is only detected on an
-  explicit change in value.
-
 - **Respect page boundaries.** Page boundaries can optionally also be respected using the
   ``multipage_sections`` argument. This defaults to ``True`` meaning that a page break does *not*
   start a new chunk. Setting this to ``False`` will separate elements that occur on different pages
diff --git a/test_unstructured/chunking/test_base.py b/test_unstructured/chunking/test_base.py
index dd9a68fa78..ab90d38d18 100644
--- a/test_unstructured/chunking/test_base.py
+++ b/test_unstructured/chunking/test_base.py
@@ -17,7 +17,6 @@
     TextPreChunk,
     TextPreChunkAccumulator,
     _TextSplitter,
-    is_in_next_section,
     is_on_next_page,
     is_title,
 )
@@ -1514,68 +1513,6 @@ def but_it_does_not_generate_a_TextPreChunk_on_flush_when_empty(self):
 # ================================================================================================
 
 
-class Describe_is_in_next_section:
-    """Unit-test suite for `unstructured.chunking.base.is_in_next_section()` function.
-
-    `is_in_next_section()` is not itself a predicate, rather it returns a predicate on Element
-    (`Callable[[Element], bool]`) that can be called repeatedly to detect section changes in an
-    element stream.
-    """
-
-    def it_is_false_for_the_first_element_when_it_has_a_non_None_section(self):
-        """This is an explicit first-section; first-section does not represent a section break."""
-        pred = is_in_next_section()
-        assert not pred(Text("abcd", metadata=ElementMetadata(section="Introduction")))
-
-    def and_it_is_false_for_the_first_element_when_it_has_a_None_section(self):
-        """This is an anonymous first-section; still doesn't represent a section break."""
-        pred = is_in_next_section()
-        assert not pred(Text("abcd"))
-
-    def it_is_false_for_None_section_elements_that_follow_an_explicit_first_section(self):
-        """A `None` section element is considered to continue the prior section."""
-        pred = is_in_next_section()
-        assert not pred(Text("abcd", metadata=ElementMetadata(section="Introduction")))
-        assert not pred(Text("efgh"))
-        assert not pred(Text("ijkl"))
-
-    def and_it_is_false_for_None_section_elements_that_follow_an_anonymous_first_section(self):
-        """A `None` section element is considered to continue the prior section."""
-        pred = is_in_next_section()
-        assert not pred(Text("abcd"))
-        assert not pred(Text("efgh"))
-        assert not pred(Text("ijkl"))
-
-    def it_is_false_for_matching_section_elements_that_follow_an_explicit_first_section(self):
-        pred = is_in_next_section()
-        assert not pred(Text("abcd", metadata=ElementMetadata(section="Introduction")))
-        assert not pred(Text("efgh", metadata=ElementMetadata(section="Introduction")))
-        assert not pred(Text("ijkl", metadata=ElementMetadata(section="Introduction")))
-
-    def it_is_true_for_an_explicit_section_element_that_follows_an_anonymous_first_section(self):
-        pred = is_in_next_section()
-        assert not pred(Text("abcd"))
-        assert not pred(Text("efgh"))
-        assert pred(Text("ijkl", metadata=ElementMetadata(section="Introduction")))
-
-    def and_it_is_true_for_a_different_explicit_section_that_follows_an_explicit_section(self):
-        pred = is_in_next_section()
-        assert not pred(Text("abcd", metadata=ElementMetadata(section="Introduction")))
-        assert pred(Text("efgh", metadata=ElementMetadata(section="Summary")))
-
-    def it_is_true_whenever_the_section_explicitly_changes_except_at_the_start(self):
-        pred = is_in_next_section()
-        assert not pred(Text("abcd"))
-        assert pred(Text("efgh", metadata=ElementMetadata(section="Introduction")))
-        assert not pred(Text("ijkl"))
-        assert not pred(Text("mnop", metadata=ElementMetadata(section="Introduction")))
-        assert not pred(Text("qrst"))
-        assert pred(Text("uvwx", metadata=ElementMetadata(section="Summary")))
-        assert not pred(Text("yzab", metadata=ElementMetadata(section="Summary")))
-        assert not pred(Text("cdef"))
-        assert pred(Text("ghij", metadata=ElementMetadata(section="Appendix")))
-
-
 class Describe_is_on_next_page:
     """Unit-test suite for `unstructured.chunking.base.is_on_next_page()` function.
 
diff --git a/test_unstructured/chunking/test_title.py b/test_unstructured/chunking/test_title.py
index be5b82e25a..7ffa652b06 100644
--- a/test_unstructured/chunking/test_title.py
+++ b/test_unstructured/chunking/test_title.py
@@ -139,43 +139,6 @@ def test_chunk_by_title():
     )
 
 
-def test_chunk_by_title_respects_section_change():
-    elements: list[Element] = [
-        Title("A Great Day", metadata=ElementMetadata(section="first")),
-        Text("Today is a great day.", metadata=ElementMetadata(section="second")),
-        Text("It is sunny outside.", metadata=ElementMetadata(section="second")),
-        Table("Heading\nCell text"),
-        Title("An Okay Day"),
-        Text("Today is an okay day."),
-        Text("It is rainy outside."),
-        Title("A Bad Day"),
-        Text(
-            "Today is a bad day.",
-            metadata=ElementMetadata(
-                regex_metadata={"a": [RegexMetadata(text="A", start=0, end=1)]},
-            ),
-        ),
-        Text("It is storming outside."),
-        CheckBox(),
-    ]
-
-    chunks = chunk_by_title(elements, combine_text_under_n_chars=0)
-
-    assert chunks == [
-        CompositeElement(
-            "A Great Day",
-        ),
-        CompositeElement(
-            "Today is a great day.\n\nIt is sunny outside.",
-        ),
-        Table("Heading\nCell text"),
-        CompositeElement("An Okay Day\n\nToday is an okay day.\n\nIt is rainy outside."),
-        CompositeElement(
-            "A Bad Day\n\nToday is a bad day.\n\nIt is storming outside.",
-        ),
-    ]
-
-
 def test_chunk_by_title_separates_by_page_number():
     elements: list[Element] = [
         Title("A Great Day", metadata=ElementMetadata(page_number=1)),
diff --git a/test_unstructured/partition/epub/test_epub.py b/test_unstructured/partition/epub/test_epub.py
index 5af52ac1c6..95eb68ae02 100644
--- a/test_unstructured/partition/epub/test_epub.py
+++ b/test_unstructured/partition/epub/test_epub.py
@@ -77,7 +77,6 @@ def test_partition_epub_from_filename_exclude_metadata():
     assert elements[0].metadata.filetype is None
     assert elements[0].metadata.page_name is None
     assert elements[0].metadata.filename is None
-    assert elements[0].metadata.section is None
 
 
 def test_partition_epub_from_file_exlcude_metadata():
@@ -87,7 +86,6 @@ def test_partition_epub_from_file_exlcude_metadata():
     assert elements[0].metadata.filetype is None
     assert elements[0].metadata.page_name is None
     assert elements[0].metadata.filename is None
-    assert elements[0].metadata.section is None
 
 
 def test_partition_epub_metadata_date(
diff --git a/test_unstructured/staging/test_base.py b/test_unstructured/staging/test_base.py
index 0aa0fa5ed9..105e1b4615 100644
--- a/test_unstructured/staging/test_base.py
+++ b/test_unstructured/staging/test_base.py
@@ -166,7 +166,6 @@ def test_default_pandas_dtypes():
             sent_from=["sent", "from"],
             sent_to=["sent", "to"],
             subject="subject",
-            section="section",
             header_footer_type="header_footer_type",
             emphasized_text_contents=["emphasized", "text", "contents"],
             emphasized_text_tags=["emphasized", "text", "tags"],
@@ -321,7 +320,6 @@ def test_convert_to_coco():
                 sent_from=["sent", "from"],
                 sent_to=["sent", "to"],
                 subject="subject",
-                section="section",
                 header_footer_type="header_footer_type",
                 emphasized_text_contents=["emphasized", "text", "contents"],
                 emphasized_text_tags=["emphasized", "text", "tags"],
@@ -366,7 +364,6 @@ def test_convert_to_coco():
                 sent_from=["sent", "from"],
                 sent_to=["sent", "to"],
                 subject="subject",
-                section="section",
                 header_footer_type="header_footer_type",
                 emphasized_text_contents=["emphasized", "text", "contents"],
                 emphasized_text_tags=["emphasized", "text", "tags"],
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 10b9b96d79..927d266d2c 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.13.3"  # pragma: no cover
+__version__ = "0.13.4-dev0"  # pragma: no cover
diff --git a/unstructured/chunking/base.py b/unstructured/chunking/base.py
index 106fe9b38e..edf37b3d2d 100644
--- a/unstructured/chunking/base.py
+++ b/unstructured/chunking/base.py
@@ -1022,51 +1022,6 @@ def will_fit(self, pre_chunk: TextPreChunk) -> bool:
 # ================================================================================================
 
 
-def is_in_next_section() -> BoundaryPredicate:
-    """Not a predicate itself, calling this returns a predicate that triggers on each new section.
-
-    The lifetime of the returned callable cannot extend beyond a single element-stream because it
-    stores current state (current section) that is particular to that element stream.
-
-    A "section" of this type is particular to the EPUB format (so far) and not to be confused with
-    a "section" composed of a section-heading (`Title` element) followed by content elements.
-
-    The returned predicate tracks the current section, starting at `None`. Calling with an element
-    with a different value for `metadata.section` returns True, indicating the element starts a new
-    section boundary, and updates the enclosed section name ready for the next transition.
-    """
-    current_section: Optional[str] = None
-    is_first: bool = True
-
-    def section_changed(element: Element) -> bool:
-        nonlocal current_section, is_first
-
-        section = element.metadata.section
-
-        # -- The first element never reports a section break, it starts the first section of the
-        # -- document. That section could be named (section is non-None) or anonymous (section is
-        # -- None). We don't really have to care.
-        if is_first:
-            current_section = section
-            is_first = False
-            return False
-
-        # -- An element with a `None` section is assumed to continue the current section. It never
-        # -- updates the current-section because once set, the current-section is "sticky" until
-        # -- replaced by another explicit section.
-        if section is None:
-            return False
-
-        # -- another element with the same section continues that section --
-        if section == current_section:
-            return False
-
-        current_section = section
-        return True
-
-    return section_changed
-
-
 def is_on_next_page() -> BoundaryPredicate:
     """Not a predicate itself, calling this returns a predicate that triggers on each new page.
 
diff --git a/unstructured/chunking/title.py b/unstructured/chunking/title.py
index e9f5e54e4d..878302301e 100644
--- a/unstructured/chunking/title.py
+++ b/unstructured/chunking/title.py
@@ -13,7 +13,6 @@
     ChunkingOptions,
     PreChunkCombiner,
     PreChunker,
-    is_in_next_section,
     is_on_next_page,
     is_title,
 )
@@ -121,7 +120,6 @@ def boundary_predicates(self) -> tuple[BoundaryPredicate, ...]:
 
         def iter_boundary_predicates() -> Iterator[BoundaryPredicate]:
             yield is_title
-            yield is_in_next_section()
             if not self.multipage_sections:
                 yield is_on_next_page()
 
diff --git a/unstructured/documents/elements.py b/unstructured/documents/elements.py
index d3783e9c4c..95778636ce 100644
--- a/unstructured/documents/elements.py
+++ b/unstructured/documents/elements.py
@@ -191,8 +191,6 @@ class ElementMetadata:
     parent_id: Optional[str]
     # -- "fields" e.g. status, dept.no, etc. extracted from text via regex --
     regex_metadata: Optional[dict[str, list[RegexMetadata]]]
-    # -- EPUB document section --
-    section: Optional[str]
 
     # -- e-mail specific metadata fields --
     sent_from: Optional[list[str]]
@@ -235,7 +233,6 @@ def __init__(
         page_number: Optional[int] = None,
         parent_id: Optional[str] = None,
         regex_metadata: Optional[dict[str, list[RegexMetadata]]] = None,
-        section: Optional[str] = None,
         sent_from: Optional[list[str]] = None,
         sent_to: Optional[list[str]] = None,
         signature: Optional[str] = None,
@@ -275,7 +272,6 @@ def __init__(
         self.page_number = page_number
         self.parent_id = parent_id
         self.regex_metadata = regex_metadata
-        self.section = section
         self.sent_from = sent_from
         self.sent_to = sent_to
         self.signature = signature
@@ -488,7 +484,6 @@ def field_consolidation_strategies(cls) -> dict[str, ConsolidationStrategy]:
             "page_number": cls.FIRST,
             "parent_id": cls.DROP,
             "regex_metadata": cls.REGEX,
-            "section": cls.FIRST,
             "sent_from": cls.FIRST,
             "sent_to": cls.FIRST,
             "signature": cls.FIRST,
@@ -671,7 +666,7 @@ def to_dict(cls):
 
 
 class Element(abc.ABC):
-    """An element is a section of a page in the document.
+    """An element is a semantically-coherent component of a document, often a paragraph.
 
     There are a few design principles that are followed when creating an element:
     1. It will always have an ID, which by default is a random UUID.
@@ -694,7 +689,9 @@ def __init__(
         metadata: Optional[ElementMetadata] = None,
         detection_origin: Optional[str] = None,
     ):
-        if element_id is not None and not isinstance(element_id, str):
+        if element_id is not None and not isinstance(
+            element_id, str
+        ):  # pyright: ignore[reportUnnecessaryIsInstance]
             raise ValueError("element_id must be of type str or None.")
 
         self._element_id = element_id
@@ -885,7 +882,12 @@ class Formula(Text):
 
 
 class CompositeElement(Text):
-    """A section of text consisting of a combination of elements."""
+    """A chunk formed from text (non-Table) elements.
+
+    Only produced by chunking. An instance may be formed by combining one or more sequential
+    elements produced by partitioning. It it also used when text-splitting an "oversized" element,
+    a single element that by itself is larger than the requested chunk size.
+    """
 
     category = "CompositeElement"
 
diff --git a/unstructured/partition/common.py b/unstructured/partition/common.py
index 5caff1897c..3e79573437 100644
--- a/unstructured/partition/common.py
+++ b/unstructured/partition/common.py
@@ -272,7 +272,6 @@ def add_element_metadata(
     text_as_html: Optional[str] = None,
     coordinates: Optional[tuple[tuple[float, float], ...]] = None,
     coordinate_system: Optional[CoordinateSystem] = None,
-    section: Optional[str] = None,
     image_path: Optional[str] = None,
     detection_origin: Optional[str] = None,
     languages: Optional[List[str]] = None,
@@ -324,7 +323,6 @@ def add_element_metadata(
         link_start_indexes=link_start_indexes,
         emphasized_text_contents=emphasized_text_contents,
         emphasized_text_tags=emphasized_text_tags,
-        section=section,
         category_depth=depth,
         image_path=image_path,
         languages=languages,