diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1c944f0aa..332da61b00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: setup: strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] runs-on: ubuntu-latest env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -30,7 +30,7 @@ jobs: check-deps: strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -44,7 +44,7 @@ jobs: lint: strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] env: NLTK_DATA: ${{ github.workspace }}/nltk_data runs-on: ubuntu-latest @@ -84,7 +84,7 @@ jobs: test_unit: strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] runs-on: ubuntu-latest env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -150,7 +150,7 @@ jobs: test_unit_no_extras: strategy: matrix: - python-version: ["3.8"] + python-version: ["3.10"] runs-on: ubuntu-latest env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -176,7 +176,7 @@ jobs: # NOTE(newelh) - Split extras into separate steps in the same pipeline (avoid using matrix) strategy: matrix: - python-version: ["3.8"] + python-version: ["3.10"] extra: ["csv", "docx", "odt", "markdown", "pypandoc", "msg", "pdf-image", "pptx", "xlsx"] runs-on: ubuntu-latest env: @@ -218,7 +218,7 @@ jobs: setup_ingest: strategy: matrix: - python-version: [ "3.8","3.9","3.10","3.11" ] + python-version: [ "3.9","3.10","3.11" ] runs-on: ubuntu-latest env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -232,7 +232,7 @@ jobs: test_ingest_unit: strategy: matrix: - python-version: [ "3.8","3.9","3.10","3.11" ] + python-version: [ "3.9","3.10","3.11" ] runs-on: ubuntu-latest needs: [ setup_ingest, lint ] steps: @@ -258,7 +258,7 @@ jobs: test_ingest_src: strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] runs-on: ubuntu-latest-m env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -338,7 +338,7 @@ jobs: environment: ci strategy: matrix: - python-version: ["3.8","3.9","3.10","3.11"] + python-version: ["3.9","3.10","3.11"] runs-on: ubuntu-latest-m env: NLTK_DATA: ${{ github.workspace }}/nltk_data @@ -394,8 +394,8 @@ jobs: test_unstructured_api_unit: strategy: matrix: - # NOTE(yuming): Unstructured API only use Python 3.8 - python-version: ["3.8"] + # NOTE(yuming): Unstructured API only use Python 3.10 + python-version: ["3.10"] runs-on: ubuntu-latest env: NLTK_DATA: ${{ github.workspace }}/nltk_data diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 0a09199fd1..3214db9e60 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -9,7 +9,7 @@ env: DOCKER_REPOSITORY: quay.io/unstructured-io/unstructured DOCKER_BUILD_REPOSITORY: quay.io/unstructured-io/build-unstructured PIP_VERSION: "23.2.1" - PYTHON_VERSION: "3.8" + PYTHON_VERSION: "3.10" jobs: set-short-sha: diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e108ed328..20f5a50328 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.12.0 + +### Enhancements + +* **Drop support for python3.8** All dependencies are now built off of the minimum version of python being `3.10` + ## 0.11.9-dev4 ### Enhancements diff --git a/docker/ubuntu-22/Dockerfile b/docker/ubuntu-22/Dockerfile index 75e9fcf9aa..059bfc85bb 100644 --- a/docker/ubuntu-22/Dockerfile +++ b/docker/ubuntu-22/Dockerfile @@ -13,7 +13,7 @@ COPY Makefile Makefile SHELL ["/bin/bash", "-c"] -RUN source ~/.bashrc && pyenv virtualenv 3.8.17 unstructured && \ +RUN source ~/.bashrc && pyenv virtualenv 3.10 unstructured && \ source ~/.pyenv/versions/unstructured/bin/activate && \ make install-ci && \ make install-ingest-s3 && \ diff --git a/docs/requirements.txt b/docs/requirements.txt index ee5fdd1d2d..9e4529a720 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=build.txt build.in # -alabaster==0.7.13 +alabaster==0.7.15 # via sphinx -babel==2.13.1 +babel==2.14.0 # via sphinx beautifulsoup4==4.12.2 # via @@ -36,7 +36,7 @@ idna==3.6 # requests imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 # via sphinx jinja2==3.1.2 # via @@ -63,8 +63,6 @@ pygments==2.17.2 # furo # sphinx # sphinx-tabs -pytz==2023.3.post1 - # via babel pyyaml==6.0.1 # via myst-parser requests==2.31.0 diff --git a/environment.yml b/environment.yml index fcaaed9515..43faedc787 100644 --- a/environment.yml +++ b/environment.yml @@ -7,7 +7,7 @@ channels: - pytorch dependencies: - - python=3.8 + - python=3.10 - pytorch=1.12.1 - pywin32 - poppler diff --git a/examples/argilla-summarization/README.md b/examples/argilla-summarization/README.md index 3829d339eb..f6240adf49 100644 --- a/examples/argilla-summarization/README.md +++ b/examples/argilla-summarization/README.md @@ -7,7 +7,7 @@ complete a data science project in hours that previously would have taken weeks. To get started, use the following steps: -- Ensure you have Python 3.8 or higher installed on your system +- Ensure you have Python 3.10 or higher installed on your system - Create a new Python virtual environment - Run `pip install -r requirements.txt` to install the dependencies - Run `PYTHONPATH=. jupyter notebook` from this directory to launch the notebook diff --git a/examples/argilla-summarization/requirements.txt b/examples/argilla-summarization/requirements.txt index cdeb77392e..8962bab8ed 100644 --- a/examples/argilla-summarization/requirements.txt +++ b/examples/argilla-summarization/requirements.txt @@ -1,290 +1,330 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile # aiofiles==22.1.0 # via argilla -aiohttp==3.8.4 +aiohttp==3.9.1 # via # datasets + # elasticsearch8 # fsspec aiosignal==1.3.1 # via aiohttp -anyio==3.6.2 +aiosqlite==0.19.0 + # via argilla +alembic==1.9.4 + # via argilla +anyio==4.2.0 # via # httpcore # jupyter-server # starlette # watchfiles appnope==0.1.3 - # via - # ipykernel - # ipython -argilla[server]==1.3.1 - # via - # -r requirements.in - # unstructured -argon2-cffi==21.3.0 - # via - # jupyter-server - # nbclassic - # notebook + # via ipykernel +argilla[server]==1.21.0 + # via -r requirements.in +argon2-cffi==23.1.0 + # via jupyter-server argon2-cffi-bindings==21.2.0 # via argon2-cffi -arrow==1.2.3 +arrow==1.3.0 # via isoduration -asttokens==2.2.1 +asttokens==2.4.1 # via stack-data -async-timeout==4.0.2 +async-lru==2.0.4 + # via jupyterlab +async-timeout==4.0.3 # via aiohttp -attrs==22.2.0 +attrs==23.2.0 # via # aiohttp # jsonschema -backcall==0.2.0 - # via ipython + # referencing +babel==2.14.0 + # via jupyterlab-server backoff==1.11.1 # via # argilla # segment-analytics-python -bcrypt==4.0.1 + # unstructured +bcrypt==4.1.2 # via passlib -beautifulsoup4==4.11.2 - # via nbconvert -bleach==6.0.0 +beautifulsoup4==4.12.2 + # via + # nbconvert + # unstructured +bleach==6.1.0 # via nbconvert -brotli==1.0.9 +brotli==1.1.0 # via brotli-asgi brotli-asgi==1.2.0 # via argilla -certifi==2022.12.7 +certifi==2023.11.17 # via # elastic-transport # httpcore # httpx # opensearch-py # requests - # unstructured -cffi==1.15.1 + # unstructured-client +cffi==1.16.0 # via # argon2-cffi-bindings # cryptography -charset-normalizer==3.0.1 +chardet==5.2.0 + # via unstructured +charset-normalizer==3.3.2 # via - # aiohttp # requests -click==8.1.3 + # unstructured-client +click==8.1.7 # via # nltk + # typer # uvicorn -comm==0.1.2 - # via ipykernel +comm==0.2.1 + # via + # ipykernel + # ipywidgets cryptography==41.0.2 # via # -r requirements.in # python-jose -datasets==2.10.0 +dataclasses-json==0.6.3 + # via + # unstructured + # unstructured-client +datasets==2.16.1 # via -r requirements.in -debugpy==1.6.6 +debugpy==1.8.0 # via ipykernel decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deprecated==1.2.13 +deprecated==1.2.14 # via argilla -dill==0.3.6 +dill==0.3.7 # via # datasets # multiprocess ecdsa==0.18.0 # via python-jose -elastic-transport==8.4.0 - # via elasticsearch -elasticsearch==8.5.3 +elastic-transport==8.11.0 + # via elasticsearch8 +elasticsearch8[async]==8.7.0 # via argilla -et-xmlfile==1.1.0 - # via openpyxl -executing==1.2.0 +emoji==2.9.0 + # via unstructured +exceptiongroup==1.2.0 + # via + # anyio + # ipython +executing==2.0.1 # via stack-data -fastapi==0.88.0 +fastapi==0.108.0 # via argilla -fastjsonschema==2.16.3 +fastjsonschema==2.19.1 # via nbformat -filelock==3.9.0 +filelock==3.13.1 # via + # datasets # huggingface-hub + # torch # transformers +filetype==1.2.0 + # via unstructured fqdn==1.5.1 # via jsonschema -frozenlist==1.3.3 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec[http]==2023.1.0 - # via datasets +fsspec[http]==2023.10.0 + # via + # datasets + # huggingface-hub + # torch +greenlet==3.0.3 + # via argilla h11==0.14.0 # via # httpcore # uvicorn -httpcore==0.16.3 +httpcore==0.18.0 # via httpx -httptools==0.5.0 +httptools==0.6.1 # via uvicorn -httpx==0.23.3 +httpx==0.25.0 # via argilla -huggingface-hub==0.12.1 +huggingface-hub==0.20.2 # via # datasets + # tokenizers # transformers -idna==3.4 +idna==3.6 # via # anyio + # httpx # jsonschema # requests - # rfc3986 + # unstructured-client # yarl -importlib-metadata==6.0.0 - # via - # jupyter-client - # nbconvert -importlib-resources==5.12.0 - # via jsonschema -ipykernel==6.21.2 +ipykernel==6.28.0 # via - # ipywidgets # jupyter # jupyter-console - # nbclassic - # notebook + # jupyterlab # qtconsole -ipython==8.10.0 +ipython==8.20.0 # via # -r requirements.in # ipykernel # ipywidgets # jupyter-console -ipython-genutils==0.2.0 - # via - # nbclassic - # notebook - # qtconsole -ipywidgets==8.0.4 +ipywidgets==8.1.1 # via jupyter isoduration==20.11.0 # via jsonschema -jedi==0.18.2 +jedi==0.19.1 # via ipython jinja2==3.1.2 # via # jupyter-server - # nbclassic + # jupyterlab + # jupyterlab-server # nbconvert - # notebook -joblib==1.2.0 + # torch +joblib==1.3.2 # via # nltk # scikit-learn -jsonpointer==2.3 +json5==0.9.14 + # via jupyterlab-server +jsonpath-python==1.0.6 + # via unstructured-client +jsonpointer==2.4 # via jsonschema -jsonschema[format-nongpl]==4.17.3 +jsonschema[format-nongpl]==4.20.0 # via # jupyter-events + # jupyterlab-server # nbformat +jsonschema-specifications==2023.12.1 + # via jsonschema jupyter==1.0.0 # via -r requirements.in -jupyter-client==8.0.3 +jupyter-client==8.6.0 # via # ipykernel # jupyter-console # jupyter-server - # nbclassic # nbclient - # notebook # qtconsole -jupyter-console==6.6.1 +jupyter-console==6.6.3 # via jupyter -jupyter-core==5.2.0 +jupyter-core==5.7.1 # via # ipykernel # jupyter-client # jupyter-console # jupyter-server - # nbclassic + # jupyterlab # nbclient # nbconvert # nbformat - # notebook # qtconsole -jupyter-events==0.6.3 +jupyter-events==0.9.0 # via jupyter-server -jupyter-server==2.3.0 +jupyter-lsp==2.2.1 + # via jupyterlab +jupyter-server==2.12.2 # via - # nbclassic + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook # notebook-shim -jupyter-server-terminals==0.4.4 +jupyter-server-terminals==0.5.1 # via jupyter-server -jupyterlab-pygments==0.2.2 +jupyterlab==4.0.10 + # via notebook +jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-widgets==3.0.5 - # via ipywidgets -luqum==0.12.1 - # via argilla -lxml==4.9.2 +jupyterlab-server==2.25.2 # via - # python-docx - # python-pptx - # unstructured -markupsafe==2.1.2 + # jupyterlab + # notebook +jupyterlab-widgets==3.0.9 + # via ipywidgets +langdetect==1.0.9 + # via unstructured +lxml==5.1.0 + # via unstructured +mako==1.3.0 + # via alembic +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.3 # via # jinja2 + # mako # nbconvert +marshmallow==3.20.1 + # via + # dataclasses-json + # unstructured-client matplotlib-inline==0.1.6 # via # ipykernel # ipython -mistune==2.0.5 +mdurl==0.1.2 + # via markdown-it-py +mistune==3.0.2 # via nbconvert monotonic==1.6 # via # argilla # segment-analytics-python +mpmath==1.3.0 + # via sympy multidict==6.0.4 # via # aiohttp # yarl -multiprocess==0.70.14 +multiprocess==0.70.15 # via datasets -nbclassic==0.5.2 - # via notebook -nbclient==0.7.2 +mypy-extensions==1.0.0 + # via + # typing-inspect + # unstructured-client +nbclient==0.9.0 # via nbconvert -nbconvert==7.2.9 +nbconvert==7.14.0 # via # jupyter # jupyter-server - # nbclassic - # notebook -nbformat==5.7.3 +nbformat==5.9.2 # via # jupyter-server - # nbclassic # nbclient # nbconvert - # notebook -nest-asyncio==1.5.6 - # via - # ipykernel - # nbclassic - # notebook +nest-asyncio==1.5.8 + # via ipykernel +networkx==3.2.1 + # via torch nltk==3.8.1 # via unstructured -notebook==6.5.2 +notebook==7.0.6 # via jupyter -notebook-shim==0.2.2 - # via nbclassic +notebook-shim==0.2.3 + # via + # jupyterlab + # notebook numpy==1.23.5 # via # argilla @@ -294,55 +334,47 @@ numpy==1.23.5 # scikit-learn # scipy # transformers -openpyxl==3.1.1 - # via unstructured + # unstructured opensearch-py==2.0.1 # via argilla -packaging==23.0 +overrides==7.4.0 + # via jupyter-server +packaging==23.2 # via # argilla # datasets # huggingface-hub # ipykernel # jupyter-server + # jupyterlab + # jupyterlab-server + # marshmallow # nbconvert + # qtconsole # qtpy # transformers + # unstructured-client pandas==1.5.3 # via # argilla # datasets - # unstructured pandocfilters==1.5.0 # via nbconvert parso==0.8.3 # via jedi passlib[bcrypt]==1.7.4 # via argilla -pexpect==4.8.0 +pexpect==4.9.0 # via ipython -pickleshare==0.7.5 - # via ipython -pillow==9.4.0 - # via - # python-pptx - # unstructured -pkgutil-resolve-name==1.3.10 - # via jsonschema -platformdirs==3.0.0 +platformdirs==4.1.0 # via jupyter-core -ply==3.11 - # via luqum -prometheus-client==0.16.0 - # via - # jupyter-server - # nbclassic - # notebook -prompt-toolkit==3.0.37 +prometheus-client==0.19.0 + # via jupyter-server +prompt-toolkit==3.0.43 # via # ipython # jupyter-console -psutil==5.9.4 +psutil==5.9.7 # via # argilla # ipykernel @@ -352,49 +384,49 @@ ptyprocess==0.7.0 # terminado pure-eval==0.2.2 # via stack-data -pyarrow==11.0.0 +pyarrow==14.0.2 # via datasets -pyasn1==0.4.8 +pyarrow-hotfix==0.6 + # via datasets +pyasn1==0.5.1 # via # python-jose # rsa pycparser==2.21 # via cffi -pydantic==1.10.5 +pydantic==1.10.13 # via # argilla # fastapi -pygments==2.14.0 +pygments==2.17.2 # via # ipython # jupyter-console # nbconvert # qtconsole -pyrsistent==0.19.3 - # via jsonschema + # rich python-dateutil==2.8.2 # via # arrow # jupyter-client # pandas # segment-analytics-python -python-docx==0.8.11 - # via unstructured + # unstructured-client python-dotenv==1.0.0 # via uvicorn +python-iso639==2024.1.2 + # via unstructured python-jose[cryptography]==3.3.0 # via argilla python-json-logger==2.0.7 # via jupyter-events python-magic==0.4.27 # via unstructured -python-multipart==0.0.5 +python-multipart==0.0.6 # via argilla -python-pptx==0.6.21 - # via unstructured -pytz==2022.7.1 +pytz==2023.3.post1 # via pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # argilla # datasets @@ -402,112 +434,128 @@ pyyaml==6.0 # jupyter-events # transformers # uvicorn -pyzmq==25.0.0 +pyzmq==25.1.2 # via # ipykernel # jupyter-client # jupyter-console # jupyter-server - # nbclassic - # notebook # qtconsole -qtconsole==5.4.0 +qtconsole==5.5.1 # via jupyter -qtpy==2.3.0 +qtpy==2.4.1 # via qtconsole -regex==2022.10.31 +rapidfuzz==3.6.1 + # via unstructured +referencing==0.32.1 + # via + # jsonschema + # jsonschema-specifications + # jupyter-events +regex==2023.12.25 # via # nltk # transformers -requests==2.28.2 +requests==2.31.0 # via # -r requirements.in # datasets # fsspec # huggingface-hub + # jupyterlab-server # opensearch-py - # responses # segment-analytics-python # transformers # unstructured -responses==0.18.0 - # via datasets + # unstructured-client rfc3339-validator==0.1.4 # via # jsonschema # jupyter-events -rfc3986[idna2008]==1.5.0 - # via httpx rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events +rich==13.7.0 + # via argilla +rpds-py==0.16.2 + # via + # jsonschema + # referencing rsa==4.9 # via python-jose -scikit-learn==1.2.1 +safetensors==0.4.1 + # via transformers +scikit-learn==1.3.2 # via argilla -scipy==1.10.1 +scipy==1.11.4 # via scikit-learn segment-analytics-python==2.2.0 # via argilla -send2trash==1.8.0 - # via - # jupyter-server - # nbclassic - # notebook +send2trash==1.8.2 + # via jupyter-server six==1.16.0 # via # asttokens # bleach # ecdsa + # langdetect # python-dateutil - # python-multipart # rfc3339-validator -smart-open==6.3.0 + # unstructured-client +smart-open==6.4.0 # via argilla sniffio==1.3.0 # via # anyio # httpcore # httpx -soupsieve==2.4 +soupsieve==2.5 # via beautifulsoup4 -stack-data==0.6.2 +sqlalchemy==2.0.25 + # via + # alembic + # argilla +stack-data==0.6.3 # via ipython -starlette==0.22.0 +starlette==0.32.0.post1 # via # brotli-asgi # fastapi -terminado==0.17.1 +sympy==1.12 + # via torch +tabulate==0.9.0 + # via unstructured +terminado==0.18.0 # via # jupyter-server # jupyter-server-terminals - # nbclassic - # notebook -threadpoolctl==3.1.0 +threadpoolctl==3.2.0 # via scikit-learn tinycss2==1.2.1 # via nbconvert -tokenizers==0.13.2 +tokenizers==0.15.0 # via transformers -torch==1.13.1 +tomli==2.0.1 + # via jupyterlab +torch==2.1.2 # via -r requirements.in -tornado==6.2 +tornado==6.4 # via # ipykernel # jupyter-client # jupyter-server - # nbclassic + # jupyterlab # notebook # terminado -tqdm==4.64.1 +tqdm==4.66.1 # via # argilla # datasets # huggingface-hub # nltk # transformers -traitlets==5.9.0 +traitlets==5.14.1 # via # comm # ipykernel @@ -518,62 +566,73 @@ traitlets==5.9.0 # jupyter-core # jupyter-events # jupyter-server + # jupyterlab # matplotlib-inline - # nbclassic # nbclient # nbconvert # nbformat - # notebook # qtconsole -transformers==4.26.1 +transformers==4.36.2 # via -r requirements.in -typing-extensions==4.5.0 +typer==0.9.0 + # via argilla +types-python-dateutil==2.8.19.20240106 + # via arrow +typing-extensions==4.9.0 # via + # anyio + # async-lru + # fastapi # huggingface-hub # pydantic - # starlette + # sqlalchemy # torch -unstructured==0.4.15 + # typer + # typing-inspect + # unstructured + # unstructured-client +typing-inspect==0.9.0 + # via + # dataclasses-json + # unstructured-client +unstructured==0.11.8 # via -r requirements.in -uri-template==1.2.0 +unstructured-client==0.15.2 + # via unstructured +uri-template==1.3.0 # via jsonschema -urllib3==1.26.14 +urllib3==1.26.18 # via # elastic-transport # opensearch-py # requests - # responses + # unstructured-client uvicorn[standard]==0.20.0 # via argilla -uvloop==0.17.0 +uvloop==0.19.0 # via uvicorn -watchfiles==0.18.1 +watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.6 +wcwidth==0.2.13 # via prompt-toolkit -webcolors==1.12 +webcolors==1.13 # via jsonschema webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.5.1 +websocket-client==1.7.0 # via jupyter-server -websockets==10.4 +websockets==12.0 # via uvicorn -widgetsnbextension==4.0.5 +widgetsnbextension==4.0.9 # via ipywidgets wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.8 - # via python-pptx -xxhash==3.2.0 + # unstructured +xxhash==3.4.1 # via datasets -yarl==1.8.2 +yarl==1.9.4 # via aiohttp -zipp==3.15.0 - # via - # importlib-metadata - # importlib-resources diff --git a/examples/arxiv-topic-modelling/README.md b/examples/arxiv-topic-modelling/README.md index 3436557a44..2dcc084357 100644 --- a/examples/arxiv-topic-modelling/README.md +++ b/examples/arxiv-topic-modelling/README.md @@ -5,7 +5,7 @@ and several functions from the `unstructured` library to run topic modelling on To get started, use the following steps: -- Ensure you have Python 3.8 or higher installed on your system +- Ensure you have Python 3.10 or higher installed on your system - Create a new Python virtual environment - Run `pip install -r requirements.txt` to install the dependencies - Run `PYTHONPATH=. jupyter notebook` from this directory to launch the notebook diff --git a/examples/sec-sentiment-analysis/README.md b/examples/sec-sentiment-analysis/README.md index 28679b076e..2f244b3469 100644 --- a/examples/sec-sentiment-analysis/README.md +++ b/examples/sec-sentiment-analysis/README.md @@ -4,7 +4,7 @@ This directory contains an example of how to use the SEC API, the Unstructured S and several functions from the `unstructured` library to train a sentiment analysis model for the risk factors section of S-1 filings. To get started, use the following steps: -- Ensure you have Python 3.8 or higher installed on your system +- Ensure you have Python 3.10 or higher installed on your system - Create a new Python virtual environment - Run `pip install -r requirements.txt` to install the dependencies - Run `PYTHONPATH=. jupyter notebook` from this directory to launch the notebook diff --git a/requirements/base.in b/requirements/base.in index de413d123f..2659d5dac0 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,4 +1,4 @@ --c "constraints.in" +-c constraints.in chardet filetype python-magic diff --git a/requirements/base.txt b/requirements/base.txt index 9b0cea307d..05243d69e1 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=base.txt base.in @@ -25,7 +25,7 @@ dataclasses-json==0.6.3 # via # -r base.in # unstructured-client -emoji==2.8.0 +emoji==2.9.0 # via -r base.in filetype==1.2.0 # via -r base.in @@ -39,7 +39,7 @@ jsonpath-python==1.0.6 # via unstructured-client langdetect==1.0.9 # via -r base.in -lxml==4.9.3 +lxml==5.1.0 # via -r base.in marshmallow==3.20.1 # via @@ -51,23 +51,21 @@ mypy-extensions==1.0.0 # unstructured-client nltk==3.8.1 # via -r base.in -numpy==1.24.4 - # via - # -c constraints.in - # -r base.in +numpy==1.26.3 + # via -r base.in packaging==23.2 # via # marshmallow # unstructured-client python-dateutil==2.8.2 # via unstructured-client -python-iso639==2023.6.15 +python-iso639==2024.1.2 # via -r base.in python-magic==0.4.27 # via -r base.in -rapidfuzz==3.5.2 +rapidfuzz==3.6.1 # via -r base.in -regex==2023.10.3 +regex==2023.12.25 # via nltk requests==2.31.0 # via @@ -84,7 +82,7 @@ tabulate==0.9.0 # via -r base.in tqdm==4.66.1 # via nltk -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -r base.in # typing-inspect @@ -93,7 +91,7 @@ typing-inspect==0.9.0 # via # dataclasses-json # unstructured-client -unstructured-client==0.14.3 +unstructured-client==0.15.2 # via -r base.in urllib3==1.26.18 # via diff --git a/requirements/build.txt b/requirements/build.txt index ee5fdd1d2d..9e4529a720 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=build.txt build.in # -alabaster==0.7.13 +alabaster==0.7.15 # via sphinx -babel==2.13.1 +babel==2.14.0 # via sphinx beautifulsoup4==4.12.2 # via @@ -36,7 +36,7 @@ idna==3.6 # requests imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 # via sphinx jinja2==3.1.2 # via @@ -63,8 +63,6 @@ pygments==2.17.2 # furo # sphinx # sphinx-tabs -pytz==2023.3.post1 - # via babel pyyaml==6.0.1 # via myst-parser requests==2.31.0 diff --git a/requirements/constraints.in b/requirements/constraints.in index 2afffaae3b..a88cacaa18 100644 --- a/requirements/constraints.in +++ b/requirements/constraints.in @@ -17,8 +17,6 @@ wheel>=0.38.1 certifi>=2023.7.22 # From pycocotools in local-inference pyparsing<3.1.0 -# NOTE(robinson) - Numpy dropped Python 3.8 support in 1.25.0 -numpy<1.25.0 scipy<1.11.0 IPython<8.13 # NOTE(alan) Pinned to avoid error that occurs with 2.4.3: @@ -38,9 +36,6 @@ unstructured.pytesseract>=0.3.12 weaviate-client>3.25.0 # Note(yuming) - pining to avoid conflict with paddle install matplotlib==3.7.2 -# NOTE(crag) - pin to available pandas for python 3.8 (at least in CI) -fsspec==2023.9.1 -pandas<2.0.4 # langchain limits anyio to below 4.0 anyio<4.0 # NOTE(crag): earlier versions fail in compilation step when pip installing the package diff --git a/requirements/dev.txt b/requirements/dev.txt index 0f880f65b3..6963df88cf 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=dev.txt dev.in @@ -22,11 +22,11 @@ asttokens==2.4.1 # via stack-data async-lru==2.0.4 # via jupyterlab -attrs==23.1.0 +attrs==23.2.0 # via # jsonschema # referencing -babel==2.13.1 +babel==2.14.0 # via jupyterlab-server backcall==0.2.0 # via ipython @@ -58,7 +58,7 @@ click==8.1.7 # -c base.txt # -c test.txt # pip-tools -comm==0.2.0 +comm==0.2.1 # via # ipykernel # ipywidgets @@ -68,7 +68,7 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -distlib==0.3.7 +distlib==0.3.8 # via virtualenv exceptiongroup==1.2.0 # via @@ -76,13 +76,13 @@ exceptiongroup==1.2.0 # anyio executing==2.0.1 # via stack-data -fastjsonschema==2.19.0 +fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 # via virtualenv fqdn==1.5.1 # via jsonschema -identify==2.5.32 +identify==2.5.33 # via pre-commit idna==3.6 # via @@ -91,7 +91,7 @@ idna==3.6 # anyio # jsonschema # requests -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 # via # build # jupyter-client @@ -99,12 +99,7 @@ importlib-metadata==7.0.0 # jupyterlab # jupyterlab-server # nbconvert -importlib-resources==6.1.1 - # via - # jsonschema - # jsonschema-specifications - # jupyterlab -ipykernel==6.27.1 +ipykernel==6.28.0 # via # jupyter # jupyter-console @@ -138,7 +133,7 @@ jsonschema[format-nongpl]==4.20.0 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.11.2 +jsonschema-specifications==2023.12.1 # via jsonschema jupyter==1.0.0 # via -r dev.in @@ -151,7 +146,7 @@ jupyter-client==8.6.0 # qtconsole jupyter-console==6.6.3 # via jupyter -jupyter-core==5.5.0 +jupyter-core==5.7.1 # via # -c constraints.in # ipykernel @@ -167,16 +162,16 @@ jupyter-events==0.9.0 # via jupyter-server jupyter-lsp==2.2.1 # via jupyterlab -jupyter-server==2.11.2 +jupyter-server==2.12.2 # via # jupyter-lsp # jupyterlab # jupyterlab-server # notebook # notebook-shim -jupyter-server-terminals==0.4.4 +jupyter-server-terminals==0.5.1 # via jupyter-server -jupyterlab==4.0.9 +jupyterlab==4.0.10 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert @@ -198,7 +193,7 @@ mistune==3.0.2 # via nbconvert nbclient==0.9.0 # via nbconvert -nbconvert==7.12.0 +nbconvert==7.14.0 # via # jupyter # jupyter-server @@ -241,23 +236,21 @@ pickleshare==0.7.5 # via ipython pip-tools==7.3.0 # via -r dev.in -pkgutil-resolve-name==1.3.10 - # via jsonschema platformdirs==3.10.0 # via # -c constraints.in # -c test.txt # jupyter-core # virtualenv -pre-commit==3.5.0 +pre-commit==3.6.0 # via -r dev.in prometheus-client==0.19.0 # via jupyter-server -prompt-toolkit==3.0.41 +prompt-toolkit==3.0.43 # via # ipython # jupyter-console -psutil==5.9.6 +psutil==5.9.7 # via ipykernel ptyprocess==0.7.0 # via @@ -283,8 +276,6 @@ python-dateutil==2.8.2 # jupyter-client python-json-logger==2.0.7 # via jupyter-events -pytz==2023.3.post1 - # via babel pyyaml==6.0.1 # via # -c test.txt @@ -301,7 +292,7 @@ qtconsole==5.5.1 # via jupyter qtpy==2.4.1 # via qtconsole -referencing==0.31.1 +referencing==0.32.1 # via # jsonschema # jsonschema-specifications @@ -319,7 +310,7 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.13.2 +rpds-py==0.16.2 # via # jsonschema # referencing @@ -362,7 +353,7 @@ tornado==6.4 # jupyterlab # notebook # terminado -traitlets==5.14.0 +traitlets==5.14.1 # via # comm # ipykernel @@ -379,9 +370,9 @@ traitlets==5.14.0 # nbconvert # nbformat # qtconsole -types-python-dateutil==2.8.19.14 +types-python-dateutil==2.8.19.20240106 # via arrow -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # -c test.txt @@ -397,7 +388,7 @@ urllib3==1.26.18 # requests virtualenv==20.25.0 # via pre-commit -wcwidth==0.2.12 +wcwidth==0.2.13 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -414,9 +405,7 @@ wheel==0.42.0 widgetsnbextension==4.0.9 # via ipywidgets zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt index 98fcd6736e..db13c3239a 100644 --- a/requirements/extra-csv.txt +++ b/requirements/extra-csv.txt @@ -1,18 +1,15 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-csv.txt extra-csv.in # -numpy==1.24.4 +numpy==1.26.3 # via # -c base.txt - # -c constraints.in # pandas -pandas==2.0.3 - # via - # -c constraints.in - # -r extra-csv.in +pandas==2.1.4 + # via -r extra-csv.in python-dateutil==2.8.2 # via # -c base.txt @@ -23,5 +20,5 @@ six==1.16.0 # via # -c base.txt # python-dateutil -tzdata==2023.3 +tzdata==2023.4 # via pandas diff --git a/requirements/extra-docx.in b/requirements/extra-docx.in index 3104a0dbec..46569e09c3 100644 --- a/requirements/extra-docx.in +++ b/requirements/extra-docx.in @@ -1,4 +1,4 @@ -c constraints.in -c base.txt -python-docx>=1.1.0 +python-docx diff --git a/requirements/extra-docx.txt b/requirements/extra-docx.txt index 527ad9e818..70c8f10b66 100644 --- a/requirements/extra-docx.txt +++ b/requirements/extra-docx.txt @@ -1,16 +1,16 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-docx.txt extra-docx.in # -lxml==4.9.3 +lxml==5.1.0 # via # -c base.txt # python-docx python-docx==1.1.0 # via -r extra-docx.in -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # python-docx diff --git a/requirements/extra-epub.txt b/requirements/extra-epub.txt index 9956e678c9..dd573ccead 100644 --- a/requirements/extra-epub.txt +++ b/requirements/extra-epub.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-epub.txt extra-epub.in diff --git a/requirements/extra-markdown.in b/requirements/extra-markdown.in index 44e8174885..d3a514bc76 100644 --- a/requirements/extra-markdown.in +++ b/requirements/extra-markdown.in @@ -1,4 +1,4 @@ --c "constraints.in" --c "base.txt" +-c constraints.in +-c base.txt markdown diff --git a/requirements/extra-markdown.txt b/requirements/extra-markdown.txt index c2c30d59a0..216485fba5 100644 --- a/requirements/extra-markdown.txt +++ b/requirements/extra-markdown.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-markdown.txt extra-markdown.in # -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 # via markdown markdown==3.5.1 # via -r extra-markdown.in diff --git a/requirements/extra-msg.txt b/requirements/extra-msg.txt index 0b4a6bfb15..fa20ef5cfa 100644 --- a/requirements/extra-msg.txt +++ b/requirements/extra-msg.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-msg.txt extra-msg.in diff --git a/requirements/extra-odt.in b/requirements/extra-odt.in index a2bd500975..6076a76a1f 100644 --- a/requirements/extra-odt.in +++ b/requirements/extra-odt.in @@ -1,5 +1,5 @@ -c constraints.in -c base.txt -python-docx>=1.1.0 +python-docx pypandoc diff --git a/requirements/extra-odt.txt b/requirements/extra-odt.txt index cc2929524e..4cb48d6013 100644 --- a/requirements/extra-odt.txt +++ b/requirements/extra-odt.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-odt.txt extra-odt.in # -lxml==4.9.3 +lxml==5.1.0 # via # -c base.txt # python-docx @@ -12,7 +12,7 @@ pypandoc==1.12 # via -r extra-odt.in python-docx==1.1.0 # via -r extra-odt.in -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # python-docx diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index f9b3ba0e9f..9c7ffe886f 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -1,14 +1,14 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-paddleocr.txt extra-paddleocr.in # attrdict==2.0.1 # via unstructured-paddleocr -babel==2.13.1 +babel==2.14.0 # via flask-babel -bce-python-sdk==0.8.97 +bce-python-sdk==0.8.99 # via visualdl blinker==1.7.0 # via flask @@ -27,7 +27,7 @@ click==8.1.7 # via # -c base.txt # flask -contourpy==1.1.1 +contourpy==1.2.0 # via matplotlib cssselect==1.2.0 # via premailer @@ -35,7 +35,7 @@ cssutils==2.9.0 # via premailer cycler==0.12.1 # via matplotlib -cython==3.0.6 +cython==3.0.7 # via unstructured-paddleocr et-xmlfile==1.1.0 # via openpyxl @@ -45,7 +45,7 @@ flask==3.0.0 # visualdl flask-babel==4.0.0 # via visualdl -fonttools==4.46.0 +fonttools==4.47.0 # via matplotlib future==0.18.3 # via bce-python-sdk @@ -53,13 +53,13 @@ idna==3.6 # via # -c base.txt # requests -imageio==2.33.0 +imageio==2.33.1 # via # imgaug # scikit-image imgaug==0.4.0 # via unstructured-paddleocr -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 # via flask importlib-resources==6.1.1 # via matplotlib @@ -77,7 +77,7 @@ lazy-loader==0.3 # via scikit-image lmdb==1.4.1 # via unstructured-paddleocr -lxml==4.9.3 +lxml==5.1.0 # via # -c base.txt # premailer @@ -91,12 +91,11 @@ matplotlib==3.7.2 # -c constraints.in # imgaug # visualdl -networkx==3.1 +networkx==3.2.1 # via scikit-image -numpy==1.24.4 +numpy==1.26.3 # via # -c base.txt - # -c constraints.in # contourpy # imageio # imgaug @@ -104,7 +103,6 @@ numpy==1.24.4 # opencv-contrib-python # opencv-python # pandas - # pywavelets # scikit-image # scipy # shapely @@ -128,11 +126,9 @@ packaging==23.2 # matplotlib # scikit-image # visualdl -pandas==2.0.3 - # via - # -c constraints.in - # visualdl -pdf2image==1.16.3 +pandas==2.1.4 + # via visualdl +pdf2image==1.17.0 # via unstructured-paddleocr pillow==10.0.1 # via @@ -151,11 +147,11 @@ protobuf==4.23.4 # via # -c constraints.in # visualdl -psutil==5.9.6 +psutil==5.9.7 # via visualdl pyclipper==1.3.0.post5 # via unstructured-paddleocr -pycryptodome==3.19.0 +pycryptodome==3.19.1 # via bce-python-sdk pyparsing==3.0.9 # via @@ -168,12 +164,9 @@ python-dateutil==2.8.2 # pandas pytz==2023.3.post1 # via - # babel # flask-babel # pandas -pywavelets==1.4.1 - # via scikit-image -rapidfuzz==3.5.2 +rapidfuzz==3.6.1 # via # -c base.txt # unstructured-paddleocr @@ -184,7 +177,7 @@ requests==2.31.0 # -c base.txt # premailer # visualdl -scikit-image==0.21.0 +scikit-image==0.22.0 # via # imgaug # unstructured-paddleocr @@ -205,13 +198,13 @@ six==1.16.0 # imgaug # python-dateutil # visualdl -tifffile==2023.7.10 +tifffile==2023.12.9 # via scikit-image tqdm==4.66.1 # via # -c base.txt # unstructured-paddleocr -tzdata==2023.3 +tzdata==2023.4 # via pandas unstructured-paddleocr==2.6.1.3 # via -r extra-paddleocr.in diff --git a/requirements/extra-pandoc.txt b/requirements/extra-pandoc.txt index 5f18ff91b0..eb0cbff330 100644 --- a/requirements/extra-pandoc.txt +++ b/requirements/extra-pandoc.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-pandoc.txt extra-pandoc.in diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index 4c9ef2938f..975c622b9e 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -6,7 +6,7 @@ pdf2image pdfminer.six pikepdf pypdf -# Do not move to contsraints.in, otherwise unstructured-inference will not be upgraded +# Do not move to constraints.in, otherwise unstructured-inference will not be upgraded # when unstructured library is. unstructured-inference==0.7.21 # unstructured fork of pytesseract that provides an interface to allow for multiple output formats diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index e4e717d18d..39c1ae4442 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-pdf-image.txt extra-pdf-image.in @@ -20,7 +20,7 @@ charset-normalizer==3.3.2 # requests coloredlogs==15.0.1 # via onnxruntime -contourpy==1.1.1 +contourpy==1.2.0 # via matplotlib cryptography==41.0.7 # via pdfminer-six @@ -37,14 +37,13 @@ filelock==3.13.1 # transformers flatbuffers==23.5.26 # via onnxruntime -fonttools==4.46.0 +fonttools==4.47.0 # via matplotlib -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c constraints.in # huggingface-hub # torch -huggingface-hub==0.19.4 +huggingface-hub==0.20.2 # via # timm # tokenizers @@ -66,7 +65,7 @@ kiwisolver==1.4.5 # via matplotlib layoutparser[layoutmodels,tesseract]==0.3.4 # via unstructured-inference -lxml==4.9.3 +lxml==5.1.0 # via # -c base.txt # pikepdf @@ -78,12 +77,11 @@ matplotlib==3.7.2 # pycocotools mpmath==1.3.0 # via sympy -networkx==3.1 +networkx==3.2.1 # via torch -numpy==1.24.4 +numpy==1.26.3 # via # -c base.txt - # -c constraints.in # contourpy # layoutparser # matplotlib @@ -120,11 +118,9 @@ packaging==23.2 # pytesseract # transformers # unstructured-pytesseract -pandas==2.0.3 - # via - # -c constraints.in - # layoutparser -pdf2image==1.16.3 +pandas==2.1.4 + # via layoutparser +pdf2image==1.17.0 # via # -r extra-pdf-image.in # layoutparser @@ -134,7 +130,7 @@ pdfminer-six==20221105 # pdfplumber pdfplumber==0.10.3 # via layoutparser -pikepdf==8.8.0 +pikepdf==8.11.0 # via -r extra-pdf-image.in pillow==10.0.1 # via @@ -164,9 +160,9 @@ pyparsing==3.0.9 # via # -c constraints.in # matplotlib -pypdf==3.17.1 +pypdf==3.17.4 # via -r extra-pdf-image.in -pypdfium2==4.24.0 +pypdfium2==4.25.0 # via pdfplumber pytesseract==0.3.10 # via layoutparser @@ -186,11 +182,11 @@ pyyaml==6.0.1 # omegaconf # timm # transformers -rapidfuzz==3.5.2 +rapidfuzz==3.6.1 # via # -c base.txt # unstructured-inference -regex==2023.10.3 +regex==2023.12.25 # via # -c base.txt # transformers @@ -221,14 +217,14 @@ timm==0.9.12 # via effdet tokenizers==0.15.0 # via transformers -torch==2.1.1 +torch==2.1.2 # via # -c constraints.in # effdet # layoutparser # timm # torchvision -torchvision==0.16.1 +torchvision==0.16.2 # via # effdet # layoutparser @@ -239,16 +235,16 @@ tqdm==4.66.1 # huggingface-hub # iopath # transformers -transformers==4.35.2 +transformers==4.36.2 # via unstructured-inference -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # huggingface-hub # iopath # pypdf # torch -tzdata==2023.3 +tzdata==2023.4 # via pandas unstructured-inference==0.7.21 # via -r extra-pdf-image.in diff --git a/requirements/extra-pptx.in b/requirements/extra-pptx.in index f776ba7618..b5ab3e30d7 100644 --- a/requirements/extra-pptx.in +++ b/requirements/extra-pptx.in @@ -1,3 +1,3 @@ --c "constraints.in" +-c constraints.in python-pptx<=0.6.23 diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt index 36f94c465c..ec9ac5b040 100644 --- a/requirements/extra-pptx.txt +++ b/requirements/extra-pptx.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-pptx.txt extra-pptx.in # -lxml==4.9.3 +lxml==5.1.0 # via python-pptx pillow==10.0.1 # via diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt index de5e02dfc6..36a552b34b 100644 --- a/requirements/extra-xlsx.txt +++ b/requirements/extra-xlsx.txt @@ -1,24 +1,21 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=extra-xlsx.txt extra-xlsx.in # et-xmlfile==1.1.0 # via openpyxl -networkx==3.1 +networkx==3.2.1 # via -r extra-xlsx.in -numpy==1.24.4 +numpy==1.26.3 # via # -c base.txt - # -c constraints.in # pandas openpyxl==3.1.2 # via -r extra-xlsx.in -pandas==2.0.3 - # via - # -c constraints.in - # -r extra-xlsx.in +pandas==2.1.4 + # via -r extra-xlsx.in python-dateutil==2.8.2 # via # -c base.txt @@ -29,7 +26,7 @@ six==1.16.0 # via # -c base.txt # python-dateutil -tzdata==2023.3 +tzdata==2023.4 # via pandas xlrd==2.0.1 # via -r extra-xlsx.in diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index d9558c8fdb..47dc1a5f2b 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=huggingface.txt huggingface.in @@ -22,12 +22,11 @@ filelock==3.13.1 # huggingface-hub # torch # transformers -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c constraints.in # huggingface-hub # torch -huggingface-hub==0.19.4 +huggingface-hub==0.20.2 # via # tokenizers # transformers @@ -49,12 +48,11 @@ markupsafe==2.1.3 # via jinja2 mpmath==1.3.0 # via sympy -networkx==3.1 +networkx==3.2.1 # via torch -numpy==1.24.4 +numpy==1.26.3 # via # -c base.txt - # -c constraints.in # transformers packaging==23.2 # via @@ -65,7 +63,7 @@ pyyaml==6.0.1 # via # huggingface-hub # transformers -regex==2023.10.3 +regex==2023.12.25 # via # -c base.txt # sacremoses @@ -91,7 +89,7 @@ sympy==1.12 # via torch tokenizers==0.15.0 # via transformers -torch==2.1.1 +torch==2.1.2 # via # -c constraints.in # -r huggingface.in @@ -101,9 +99,9 @@ tqdm==4.66.1 # huggingface-hub # sacremoses # transformers -transformers==4.35.2 +transformers==4.36.2 # via -r huggingface.in -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # huggingface-hub diff --git a/requirements/ingest/airtable.txt b/requirements/ingest/airtable.txt index d9145af2be..7239cd627b 100644 --- a/requirements/ingest/airtable.txt +++ b/requirements/ingest/airtable.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/airtable.txt ingest/airtable.in @@ -29,7 +29,7 @@ requests==2.31.0 # via # -c ingest/../base.txt # pyairtable -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # pyairtable diff --git a/requirements/ingest/azure-cognitive-search.txt b/requirements/ingest/azure-cognitive-search.txt index 2254f4eab6..55d747623e 100644 --- a/requirements/ingest/azure-cognitive-search.txt +++ b/requirements/ingest/azure-cognitive-search.txt @@ -1,12 +1,16 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/azure-cognitive-search.txt ingest/azure-cognitive-search.in # +anyio==3.7.1 + # via + # -c ingest/../constraints.in + # azure-core azure-common==1.1.28 # via azure-search-documents -azure-core==1.29.5 +azure-core==1.29.6 # via azure-search-documents azure-search-documents==11.4.0 # via -r ingest/azure-cognitive-search.in @@ -19,9 +23,12 @@ charset-normalizer==3.3.2 # via # -c ingest/../base.txt # requests +exceptiongroup==1.2.0 + # via anyio idna==3.6 # via # -c ingest/../base.txt + # anyio # requests isodate==0.6.1 # via azure-search-documents @@ -34,7 +41,9 @@ six==1.16.0 # -c ingest/../base.txt # azure-core # isodate -typing-extensions==4.8.0 +sniffio==1.3.0 + # via anyio +typing-extensions==4.9.0 # via # -c ingest/../base.txt # azure-core diff --git a/requirements/ingest/azure.in b/requirements/ingest/azure.in index f9713365b1..c31092d55e 100644 --- a/requirements/ingest/azure.in +++ b/requirements/ingest/azure.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt adlfs -fsspec==2023.9.1 +fsspec diff --git a/requirements/ingest/azure.txt b/requirements/ingest/azure.txt index 28ade9b0c7..6141ff00ae 100644 --- a/requirements/ingest/azure.txt +++ b/requirements/ingest/azure.txt @@ -1,20 +1,24 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/azure.txt ingest/azure.in # -adlfs==2023.10.0 +adlfs==2023.12.0 # via -r ingest/azure.in aiohttp==3.9.1 # via adlfs aiosignal==1.3.1 # via aiohttp +anyio==3.7.1 + # via + # -c ingest/../constraints.in + # azure-core async-timeout==4.0.3 # via aiohttp -attrs==23.1.0 +attrs==23.2.0 # via aiohttp -azure-core==1.29.5 +azure-core==1.29.6 # via # adlfs # azure-identity @@ -44,18 +48,20 @@ cryptography==41.0.7 # azure-storage-blob # msal # pyjwt -frozenlist==1.4.0 +exceptiongroup==1.2.0 + # via anyio +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # -r ingest/azure.in # adlfs idna==3.6 # via # -c ingest/../base.txt + # anyio # requests # yarl isodate==0.6.1 @@ -65,12 +71,16 @@ msal==1.26.0 # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.1.0 # via azure-identity multidict==6.0.4 # via # aiohttp # yarl +packaging==23.2 + # via + # -c ingest/../base.txt + # msal-extensions portalocker==2.8.2 # via msal-extensions pycparser==2.21 @@ -88,7 +98,9 @@ six==1.16.0 # -c ingest/../base.txt # azure-core # isodate -typing-extensions==4.8.0 +sniffio==1.3.0 + # via anyio +typing-extensions==4.9.0 # via # -c ingest/../base.txt # azure-core @@ -98,5 +110,5 @@ urllib3==1.26.18 # -c ingest/../base.txt # -c ingest/../constraints.in # requests -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/biomed.txt b/requirements/ingest/biomed.txt index 0d03c28c74..e8c415a3dd 100644 --- a/requirements/ingest/biomed.txt +++ b/requirements/ingest/biomed.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/biomed.txt ingest/biomed.in diff --git a/requirements/ingest/box.in b/requirements/ingest/box.in index 50cf0137ba..53414bbebc 100644 --- a/requirements/ingest/box.in +++ b/requirements/ingest/box.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt boxfs -fsspec==2023.9.1 +fsspec diff --git a/requirements/ingest/box.txt b/requirements/ingest/box.txt index 506a27c5de..bd4cb681c6 100644 --- a/requirements/ingest/box.txt +++ b/requirements/ingest/box.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/box.txt ingest/box.in # -attrs==23.1.0 +attrs==23.2.0 # via boxsdk boxfs==0.2.1 # via -r ingest/box.in @@ -23,9 +23,8 @@ charset-normalizer==3.3.2 # requests cryptography==41.0.7 # via boxsdk -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # -r ingest/box.in # boxfs idna==3.6 diff --git a/requirements/ingest/chroma.txt b/requirements/ingest/chroma.txt index 55a573056f..2957e09409 100644 --- a/requirements/ingest/chroma.txt +++ b/requirements/ingest/chroma.txt @@ -7,7 +7,6 @@ anyio==3.7.1 # via # -c ingest/../constraints.in - # fastapi # starlette # watchfiles asgiref==3.7.2 @@ -18,7 +17,9 @@ backoff==2.2.1 # opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-grpc # posthog -bcrypt==4.1.1 +bcrypt==4.1.2 + # via chromadb +build==1.0.3 # via chromadb cachetools==5.3.2 # via google-auth @@ -35,7 +36,7 @@ charset-normalizer==3.3.2 # requests chroma-hnswlib==0.7.3 # via chromadb -chromadb==0.4.18 +chromadb==0.4.22 # via -r ingest/chroma.in click==8.1.7 # via @@ -50,17 +51,15 @@ deprecated==1.2.14 # opentelemetry-exporter-otlp-proto-grpc exceptiongroup==1.2.0 # via anyio -fastapi==0.104.1 +fastapi==0.108.0 # via chromadb filelock==3.13.1 # via huggingface-hub flatbuffers==23.5.26 # via onnxruntime -fsspec==2023.9.1 - # via - # -c ingest/../constraints.in - # huggingface-hub -google-auth==2.25.2 +fsspec==2023.12.2 + # via huggingface-hub +google-auth==2.26.1 # via kubernetes googleapis-common-protos==1.62.0 # via opentelemetry-exporter-otlp-proto-grpc @@ -72,7 +71,7 @@ h11==0.14.0 # via uvicorn httptools==0.6.1 # via uvicorn -huggingface-hub==0.19.4 +huggingface-hub==0.20.2 # via tokenizers humanfriendly==10.0 # via coloredlogs @@ -82,21 +81,22 @@ idna==3.6 # anyio # requests importlib-metadata==6.11.0 - # via opentelemetry-api + # via + # build + # opentelemetry-api importlib-resources==6.1.1 # via chromadb -kubernetes==28.1.0 +kubernetes==29.0.0 # via chromadb -mmh3==4.0.1 +mmh3==4.1.0 # via chromadb monotonic==1.6 # via posthog mpmath==1.3.0 # via sympy -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # chroma-hnswlib # chromadb # onnxruntime @@ -108,7 +108,7 @@ onnxruntime==1.15.1 # via # -c ingest/../constraints.in # chromadb -opentelemetry-api==1.21.0 +opentelemetry-api==1.22.0 # via # chromadb # opentelemetry-exporter-otlp-proto-grpc @@ -116,32 +116,32 @@ opentelemetry-api==1.21.0 # opentelemetry-instrumentation-asgi # opentelemetry-instrumentation-fastapi # opentelemetry-sdk -opentelemetry-exporter-otlp-proto-common==1.21.0 +opentelemetry-exporter-otlp-proto-common==1.22.0 # via opentelemetry-exporter-otlp-proto-grpc -opentelemetry-exporter-otlp-proto-grpc==1.21.0 +opentelemetry-exporter-otlp-proto-grpc==1.22.0 # via chromadb -opentelemetry-instrumentation==0.42b0 +opentelemetry-instrumentation==0.43b0 # via # opentelemetry-instrumentation-asgi # opentelemetry-instrumentation-fastapi -opentelemetry-instrumentation-asgi==0.42b0 +opentelemetry-instrumentation-asgi==0.43b0 # via opentelemetry-instrumentation-fastapi -opentelemetry-instrumentation-fastapi==0.42b0 +opentelemetry-instrumentation-fastapi==0.43b0 # via chromadb -opentelemetry-proto==1.21.0 +opentelemetry-proto==1.22.0 # via # opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-grpc -opentelemetry-sdk==1.21.0 +opentelemetry-sdk==1.22.0 # via # chromadb # opentelemetry-exporter-otlp-proto-grpc -opentelemetry-semantic-conventions==0.42b0 +opentelemetry-semantic-conventions==0.43b0 # via # opentelemetry-instrumentation-asgi # opentelemetry-instrumentation-fastapi # opentelemetry-sdk -opentelemetry-util-http==0.42b0 +opentelemetry-util-http==0.43b0 # via # opentelemetry-instrumentation-asgi # opentelemetry-instrumentation-fastapi @@ -150,9 +150,10 @@ overrides==7.4.0 packaging==23.2 # via # -c ingest/../base.txt + # build # huggingface-hub # onnxruntime -posthog==3.1.0 +posthog==3.3.0 # via chromadb protobuf==4.23.4 # via @@ -160,7 +161,7 @@ protobuf==4.23.4 # googleapis-common-protos # onnxruntime # opentelemetry-proto -pulsar-client==3.3.0 +pulsar-client==3.4.0 # via chromadb pyasn1==0.5.1 # via @@ -175,6 +176,8 @@ pydantic==1.10.13 # fastapi pypika==0.48.9 # via chromadb +pyproject-hooks==1.0.0 + # via build python-dateutil==2.8.2 # via # -c ingest/../base.txt @@ -208,7 +211,7 @@ six==1.16.0 # python-dateutil sniffio==1.3.0 # via anyio -starlette==0.27.0 +starlette==0.32.0.post1 # via fastapi sympy==1.12 # via onnxruntime @@ -216,6 +219,10 @@ tenacity==8.2.3 # via chromadb tokenizers==0.15.0 # via chromadb +tomli==2.0.1 + # via + # build + # pyproject-hooks tqdm==4.66.1 # via # -c ingest/../base.txt @@ -223,7 +230,7 @@ tqdm==4.66.1 # huggingface-hub typer==0.9.0 # via chromadb -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # asgiref @@ -241,7 +248,7 @@ urllib3==1.26.18 # -c ingest/../constraints.in # kubernetes # requests -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.25.0 # via chromadb uvloop==0.19.0 # via uvicorn diff --git a/requirements/ingest/confluence.txt b/requirements/ingest/confluence.txt index 4d37f5b9e2..356791f6fa 100644 --- a/requirements/ingest/confluence.txt +++ b/requirements/ingest/confluence.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/confluence.txt ingest/confluence.in diff --git a/requirements/ingest/delta-table.in b/requirements/ingest/delta-table.in index b49dab5d4f..4a2955eeda 100644 --- a/requirements/ingest/delta-table.in +++ b/requirements/ingest/delta-table.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt deltalake -fsspec==2023.9.1 +fsspec diff --git a/requirements/ingest/delta-table.txt b/requirements/ingest/delta-table.txt index c66e481e47..c14cf14791 100644 --- a/requirements/ingest/delta-table.txt +++ b/requirements/ingest/delta-table.txt @@ -1,19 +1,18 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in # -deltalake==0.14.0 +deltalake==0.15.1 # via -r ingest/delta-table.in -fsspec==2023.9.1 - # via - # -c ingest/../constraints.in - # -r ingest/delta-table.in -numpy==1.24.4 +fsspec==2023.12.2 + # via -r ingest/delta-table.in +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # pyarrow -pyarrow==14.0.1 +pyarrow==14.0.2 + # via deltalake +pyarrow-hotfix==0.6 # via deltalake diff --git a/requirements/ingest/discord.txt b/requirements/ingest/discord.txt index 19bcc99074..1152c66f13 100644 --- a/requirements/ingest/discord.txt +++ b/requirements/ingest/discord.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/discord.txt ingest/discord.in @@ -10,11 +10,11 @@ aiosignal==1.3.1 # via aiohttp async-timeout==4.0.3 # via aiohttp -attrs==23.1.0 +attrs==23.2.0 # via aiohttp discord-py==2.3.2 # via -r ingest/discord.in -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal @@ -26,5 +26,5 @@ multidict==6.0.4 # via # aiohttp # yarl -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/dropbox.in b/requirements/ingest/dropbox.in index 5ada29919a..335dff3be6 100644 --- a/requirements/ingest/dropbox.in +++ b/requirements/ingest/dropbox.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt dropboxdrivefs -fsspec==2023.9.1 +fsspec diff --git a/requirements/ingest/dropbox.txt b/requirements/ingest/dropbox.txt index 3139a6c226..4b1e200279 100644 --- a/requirements/ingest/dropbox.txt +++ b/requirements/ingest/dropbox.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/dropbox.txt ingest/dropbox.in @@ -17,9 +17,8 @@ dropbox==11.36.2 # via dropboxdrivefs dropboxdrivefs==1.3.1 # via -r ingest/dropbox.in -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # -r ingest/dropbox.in # dropboxdrivefs idna==3.6 diff --git a/requirements/ingest/elasticsearch.txt b/requirements/ingest/elasticsearch.txt index b0ee61f3b0..184aa914f6 100644 --- a/requirements/ingest/elasticsearch.txt +++ b/requirements/ingest/elasticsearch.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/elasticsearch.txt ingest/elasticsearch.in @@ -9,9 +9,9 @@ certifi==2023.11.17 # -c ingest/../base.txt # -c ingest/../constraints.in # elastic-transport -elastic-transport==8.10.0 +elastic-transport==8.11.0 # via elasticsearch -elasticsearch==8.11.0 +elasticsearch==8.11.1 # via -r ingest/elasticsearch.in urllib3==1.26.18 # via diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt index c8cdfde16b..6950030959 100644 --- a/requirements/ingest/embed-aws-bedrock.txt +++ b/requirements/ingest/embed-aws-bedrock.txt @@ -1,22 +1,24 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/embed-aws-bedrock.txt ingest/embed-aws-bedrock.in # aiohttp==3.9.1 - # via langchain + # via + # langchain + # langchain-community aiosignal==1.3.1 # via aiohttp anyio==3.7.1 # via # -c ingest/../constraints.in - # langchain + # langchain-core async-timeout==4.0.3 # via # aiohttp # langchain -attrs==23.1.0 +attrs==23.2.0 # via aiohttp boto3==1.28.17 # via @@ -40,14 +42,13 @@ dataclasses-json==0.6.3 # via # -c ingest/../base.txt # langchain + # langchain-community exceptiongroup==1.2.0 # via anyio -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -greenlet==3.0.1 - # via sqlalchemy idna==3.6 # via # -c ingest/../base.txt @@ -64,13 +65,18 @@ jsonpatch==1.33 # langchain-core jsonpointer==2.4 # via jsonpatch -langchain==0.0.345 +langchain==0.1.0 # via -r ingest/embed-aws-bedrock.in -langchain-core==0.0.9 +langchain-community==0.0.10 # via langchain -langsmith==0.0.69 +langchain-core==0.1.8 # via # langchain + # langchain-community +langsmith==0.0.78 + # via + # langchain + # langchain-community # langchain-core marshmallow==3.20.1 # via @@ -84,14 +90,15 @@ mypy-extensions==1.0.0 # via # -c ingest/../base.txt # typing-inspect -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # langchain + # langchain-community packaging==23.2 # via # -c ingest/../base.txt + # langchain-core # marshmallow pydantic==1.10.13 # via @@ -104,11 +111,16 @@ python-dateutil==2.8.2 # -c ingest/../base.txt # botocore pyyaml==6.0.1 - # via langchain + # via + # langchain + # langchain-community + # langchain-core requests==2.31.0 # via # -c ingest/../base.txt # langchain + # langchain-community + # langchain-core # langsmith s3transfer==0.6.2 # via boto3 @@ -118,13 +130,16 @@ six==1.16.0 # python-dateutil sniffio==1.3.0 # via anyio -sqlalchemy==2.0.23 - # via langchain +sqlalchemy==2.0.25 + # via + # langchain + # langchain-community tenacity==8.2.3 # via # langchain + # langchain-community # langchain-core -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # pydantic @@ -140,5 +155,5 @@ urllib3==1.26.18 # -c ingest/../constraints.in # botocore # requests -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/embed-huggingface.in b/requirements/ingest/embed-huggingface.in index 1df2bd4f67..67ad182d57 100644 --- a/requirements/ingest/embed-huggingface.in +++ b/requirements/ingest/embed-huggingface.in @@ -1,6 +1,5 @@ -c ../constraints.in -c ../base.txt - huggingface langchain sentence_transformers diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt index 781ea27f3b..34b9f3e6bc 100644 --- a/requirements/ingest/embed-huggingface.txt +++ b/requirements/ingest/embed-huggingface.txt @@ -1,22 +1,24 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/embed-huggingface.txt ingest/embed-huggingface.in # aiohttp==3.9.1 - # via langchain + # via + # langchain + # langchain-community aiosignal==1.3.1 # via aiohttp anyio==3.7.1 # via # -c ingest/../constraints.in - # langchain + # langchain-core async-timeout==4.0.3 # via # aiohttp # langchain -attrs==23.1.0 +attrs==23.2.0 # via aiohttp certifi==2023.11.17 # via @@ -35,6 +37,7 @@ dataclasses-json==0.6.3 # via # -c ingest/../base.txt # langchain + # langchain-community exceptiongroup==1.2.0 # via anyio filelock==3.13.1 @@ -42,20 +45,17 @@ filelock==3.13.1 # huggingface-hub # torch # transformers -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # huggingface-hub # torch -greenlet==3.0.1 - # via sqlalchemy huggingface==0.0.1 # via -r ingest/embed-huggingface.in -huggingface-hub==0.19.4 +huggingface-hub==0.20.2 # via # sentence-transformers # tokenizers @@ -79,13 +79,18 @@ jsonpatch==1.33 # langchain-core jsonpointer==2.4 # via jsonpatch -langchain==0.0.345 +langchain==0.1.0 # via -r ingest/embed-huggingface.in -langchain-core==0.0.9 +langchain-community==0.0.10 # via langchain -langsmith==0.0.69 +langchain-core==0.1.8 + # via + # langchain + # langchain-community +langsmith==0.0.78 # via # langchain + # langchain-community # langchain-core markupsafe==2.1.3 # via jinja2 @@ -103,17 +108,17 @@ mypy-extensions==1.0.0 # via # -c ingest/../base.txt # typing-inspect -networkx==3.1 +networkx==3.2.1 # via torch nltk==3.8.1 # via # -c ingest/../base.txt # sentence-transformers -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # langchain + # langchain-community # scikit-learn # scipy # sentence-transformers @@ -123,6 +128,7 @@ packaging==23.2 # via # -c ingest/../base.txt # huggingface-hub + # langchain-core # marshmallow # transformers pillow==10.0.1 @@ -139,8 +145,10 @@ pyyaml==6.0.1 # via # huggingface-hub # langchain + # langchain-community + # langchain-core # transformers -regex==2023.10.3 +regex==2023.12.25 # via # -c ingest/../base.txt # nltk @@ -150,6 +158,8 @@ requests==2.31.0 # -c ingest/../base.txt # huggingface-hub # langchain + # langchain-community + # langchain-core # langsmith # torchvision # transformers @@ -170,24 +180,27 @@ sentencepiece==0.1.99 # via sentence-transformers sniffio==1.3.0 # via anyio -sqlalchemy==2.0.23 - # via langchain +sqlalchemy==2.0.25 + # via + # langchain + # langchain-community sympy==1.12 # via torch tenacity==8.2.3 # via # langchain + # langchain-community # langchain-core threadpoolctl==3.2.0 # via scikit-learn tokenizers==0.15.0 # via transformers -torch==2.1.1 +torch==2.1.2 # via # -c ingest/../constraints.in # sentence-transformers # torchvision -torchvision==0.16.1 +torchvision==0.16.2 # via sentence-transformers tqdm==4.66.1 # via @@ -196,9 +209,9 @@ tqdm==4.66.1 # nltk # sentence-transformers # transformers -transformers==4.35.2 +transformers==4.36.2 # via sentence-transformers -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # huggingface-hub @@ -215,5 +228,5 @@ urllib3==1.26.18 # -c ingest/../base.txt # -c ingest/../constraints.in # requests -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt index 74f3b199b2..e34bbb6c3f 100644 --- a/requirements/ingest/embed-openai.txt +++ b/requirements/ingest/embed-openai.txt @@ -1,24 +1,26 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/embed-openai.txt ingest/embed-openai.in # aiohttp==3.9.1 - # via langchain + # via + # langchain + # langchain-community aiosignal==1.3.1 # via aiohttp anyio==3.7.1 # via # -c ingest/../constraints.in # httpx - # langchain + # langchain-core # openai async-timeout==4.0.3 # via # aiohttp # langchain -attrs==23.1.0 +attrs==23.2.0 # via aiohttp certifi==2023.11.17 # via @@ -35,21 +37,20 @@ dataclasses-json==0.6.3 # via # -c ingest/../base.txt # langchain -distro==1.8.0 + # langchain-community +distro==1.9.0 # via openai exceptiongroup==1.2.0 # via anyio -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -greenlet==3.0.1 - # via sqlalchemy h11==0.14.0 # via httpcore httpcore==1.0.2 # via httpx -httpx==0.25.2 +httpx==0.26.0 # via openai idna==3.6 # via @@ -64,13 +65,18 @@ jsonpatch==1.33 # langchain-core jsonpointer==2.4 # via jsonpatch -langchain==0.0.345 +langchain==0.1.0 # via -r ingest/embed-openai.in -langchain-core==0.0.9 +langchain-community==0.0.10 # via langchain -langsmith==0.0.69 +langchain-core==0.1.8 # via # langchain + # langchain-community +langsmith==0.0.78 + # via + # langchain + # langchain-community # langchain-core marshmallow==3.20.1 # via @@ -84,16 +90,17 @@ mypy-extensions==1.0.0 # via # -c ingest/../base.txt # typing-inspect -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # langchain -openai==1.3.7 + # langchain-community +openai==1.7.0 # via -r ingest/embed-openai.in packaging==23.2 # via # -c ingest/../base.txt + # langchain-core # marshmallow pydantic==1.10.13 # via @@ -103,8 +110,11 @@ pydantic==1.10.13 # langsmith # openai pyyaml==6.0.1 - # via langchain -regex==2023.10.3 + # via + # langchain + # langchain-community + # langchain-core +regex==2023.12.25 # via # -c ingest/../base.txt # tiktoken @@ -112,6 +122,8 @@ requests==2.31.0 # via # -c ingest/../base.txt # langchain + # langchain-community + # langchain-core # langsmith # tiktoken sniffio==1.3.0 @@ -119,11 +131,14 @@ sniffio==1.3.0 # anyio # httpx # openai -sqlalchemy==2.0.23 - # via langchain +sqlalchemy==2.0.25 + # via + # langchain + # langchain-community tenacity==8.2.3 # via # langchain + # langchain-community # langchain-core tiktoken==0.5.2 # via -r ingest/embed-openai.in @@ -131,7 +146,7 @@ tqdm==4.66.1 # via # -c ingest/../base.txt # openai -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # openai @@ -147,5 +162,5 @@ urllib3==1.26.18 # -c ingest/../base.txt # -c ingest/../constraints.in # requests -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/gcs.in b/requirements/ingest/gcs.in index f7fe384fb5..20dded2ed0 100644 --- a/requirements/ingest/gcs.in +++ b/requirements/ingest/gcs.in @@ -1,5 +1,5 @@ -c ../constraints.in -c ../base.txt gcsfs -fsspec==2023.9.1 +fsspec bs4 diff --git a/requirements/ingest/gcs.txt b/requirements/ingest/gcs.txt index 171f91313f..d05f37bbda 100644 --- a/requirements/ingest/gcs.txt +++ b/requirements/ingest/gcs.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/gcs.txt ingest/gcs.in @@ -10,7 +10,7 @@ aiosignal==1.3.1 # via aiohttp async-timeout==4.0.3 # via aiohttp -attrs==23.1.0 +attrs==23.2.0 # via aiohttp beautifulsoup4==4.12.2 # via @@ -31,41 +31,40 @@ charset-normalizer==3.3.2 # requests decorator==5.1.1 # via gcsfs -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # -r ingest/gcs.in # gcsfs -gcsfs==2023.9.1 +gcsfs==2023.12.2.post1 # via -r ingest/gcs.in -google-api-core==2.14.0 +google-api-core==2.15.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.24.0 +google-auth==2.26.1 # via # gcsfs # google-api-core # google-auth-oauthlib # google-cloud-core # google-cloud-storage -google-auth-oauthlib==1.1.0 +google-auth-oauthlib==1.2.0 # via gcsfs -google-cloud-core==2.3.3 +google-cloud-core==2.4.1 # via google-cloud-storage -google-cloud-storage==2.13.0 +google-cloud-storage==2.14.0 # via gcsfs google-crc32c==1.5.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.0 # via google-cloud-storage -googleapis-common-protos==1.61.0 +googleapis-common-protos==1.62.0 # via google-api-core idna==3.6 # via @@ -109,5 +108,5 @@ urllib3==1.26.18 # -c ingest/../base.txt # -c ingest/../constraints.in # requests -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/github.txt b/requirements/ingest/github.txt index f39ad3cd32..cc44482931 100644 --- a/requirements/ingest/github.txt +++ b/requirements/ingest/github.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/github.txt ingest/github.in @@ -45,7 +45,7 @@ six==1.16.0 # via # -c ingest/../base.txt # python-dateutil -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # pygithub diff --git a/requirements/ingest/gitlab.txt b/requirements/ingest/gitlab.txt index 485ab7e994..8c526840b0 100644 --- a/requirements/ingest/gitlab.txt +++ b/requirements/ingest/gitlab.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/gitlab.txt ingest/gitlab.in @@ -17,7 +17,7 @@ idna==3.6 # via # -c ingest/../base.txt # requests -python-gitlab==4.2.0 +python-gitlab==4.3.0 # via -r ingest/gitlab.in requests==2.31.0 # via diff --git a/requirements/ingest/google-drive.txt b/requirements/ingest/google-drive.txt index 49912a2083..4d717015c9 100644 --- a/requirements/ingest/google-drive.txt +++ b/requirements/ingest/google-drive.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/google-drive.txt ingest/google-drive.in @@ -15,18 +15,18 @@ charset-normalizer==3.3.2 # via # -c ingest/../base.txt # requests -google-api-core==2.14.0 +google-api-core==2.15.0 # via google-api-python-client -google-api-python-client==2.109.0 +google-api-python-client==2.113.0 # via -r ingest/google-drive.in -google-auth==2.24.0 +google-auth==2.26.1 # via # google-api-core # google-api-python-client # google-auth-httplib2 -google-auth-httplib2==0.1.1 +google-auth-httplib2==0.2.0 # via google-api-python-client -googleapis-common-protos==1.61.0 +googleapis-common-protos==1.62.0 # via google-api-core httplib2==0.22.0 # via diff --git a/requirements/ingest/hubspot.in b/requirements/ingest/hubspot.in index 3fd724416b..bc3d8530d4 100644 --- a/requirements/ingest/hubspot.in +++ b/requirements/ingest/hubspot.in @@ -1,2 +1,4 @@ +-c ../constraints.in +-c ../base.txt hubspot-api-client -urllib3>=1.26.17 \ No newline at end of file +urllib3 \ No newline at end of file diff --git a/requirements/ingest/hubspot.txt b/requirements/ingest/hubspot.txt index d67982d3d0..6489e63728 100644 --- a/requirements/ingest/hubspot.txt +++ b/requirements/ingest/hubspot.txt @@ -1,20 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/hubspot.txt ingest/hubspot.in # certifi==2023.11.17 - # via hubspot-api-client -hubspot-api-client==8.1.1 + # via + # -c ingest/../base.txt + # -c ingest/../constraints.in + # hubspot-api-client +hubspot-api-client==8.2.0 # via -r ingest/hubspot.in python-dateutil==2.8.2 - # via hubspot-api-client + # via + # -c ingest/../base.txt + # hubspot-api-client six==1.16.0 # via + # -c ingest/../base.txt # hubspot-api-client # python-dateutil -urllib3==2.1.0 +urllib3==1.26.18 # via + # -c ingest/../base.txt + # -c ingest/../constraints.in # -r ingest/hubspot.in # hubspot-api-client diff --git a/requirements/ingest/jira.txt b/requirements/ingest/jira.txt index 0b77cb9945..22d0434403 100644 --- a/requirements/ingest/jira.txt +++ b/requirements/ingest/jira.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/jira.txt ingest/jira.in diff --git a/requirements/ingest/mongodb.txt b/requirements/ingest/mongodb.txt index d395eb0abb..09f450c2fa 100644 --- a/requirements/ingest/mongodb.txt +++ b/requirements/ingest/mongodb.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/mongodb.txt ingest/mongodb.in diff --git a/requirements/ingest/notion.txt b/requirements/ingest/notion.txt index a8386848a3..b5b427659a 100644 --- a/requirements/ingest/notion.txt +++ b/requirements/ingest/notion.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/notion.txt ingest/notion.in @@ -22,14 +22,14 @@ htmlbuilder==1.0.0 # via -r ingest/notion.in httpcore==1.0.2 # via httpx -httpx==0.25.2 +httpx==0.26.0 # via notion-client idna==3.6 # via # -c ingest/../base.txt # anyio # httpx -notion-client==2.1.0 +notion-client==2.2.1 # via -r ingest/notion.in sniffio==1.3.0 # via diff --git a/requirements/ingest/onedrive.in b/requirements/ingest/onedrive.in index a2c1b04060..4544c257dc 100644 --- a/requirements/ingest/onedrive.in +++ b/requirements/ingest/onedrive.in @@ -1,5 +1,5 @@ -c ../constraints.in -c ../base.txt msal -Office365-REST-Python-Client<2.4.3 +Office365-REST-Python-Client bs4 diff --git a/requirements/ingest/onedrive.txt b/requirements/ingest/onedrive.txt index babcca6a7a..24ed30c80d 100644 --- a/requirements/ingest/onedrive.txt +++ b/requirements/ingest/onedrive.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/onedrive.txt ingest/onedrive.in diff --git a/requirements/ingest/outlook.in b/requirements/ingest/outlook.in index 0938277096..4cbace3e19 100644 --- a/requirements/ingest/outlook.in +++ b/requirements/ingest/outlook.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt msal -Office365-REST-Python-Client<2.4.3 +Office365-REST-Python-Client diff --git a/requirements/ingest/outlook.txt b/requirements/ingest/outlook.txt index 9d4c5b5312..feeaa71cc7 100644 --- a/requirements/ingest/outlook.txt +++ b/requirements/ingest/outlook.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/outlook.txt ingest/outlook.in diff --git a/requirements/ingest/pinecone.txt b/requirements/ingest/pinecone.txt index ffa46e75ce..c9f76f47de 100644 --- a/requirements/ingest/pinecone.txt +++ b/requirements/ingest/pinecone.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/pinecone.txt ingest/pinecone.in @@ -21,10 +21,9 @@ idna==3.6 # requests loguru==0.7.2 # via pinecone-client -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # pinecone-client pinecone-client==2.2.4 # via -r ingest/pinecone.in @@ -46,7 +45,7 @@ tqdm==4.66.1 # via # -c ingest/../base.txt # pinecone-client -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # pinecone-client diff --git a/requirements/ingest/postgres.txt b/requirements/ingest/postgres.txt index f80c5ab91b..c0870d65e4 100644 --- a/requirements/ingest/postgres.txt +++ b/requirements/ingest/postgres.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --constraint=requirements/constraints.in requirements/ingest/postgres.in +# pip-compile --output-file=ingest/postgres.txt ingest/postgres.in # psycopg2-binary==2.9.9 - # via -r requirements/ingest/sql.in + # via -r ingest/postgres.in diff --git a/requirements/ingest/qdrant.txt b/requirements/ingest/qdrant.txt index ce8be74cf0..8272522605 100644 --- a/requirements/ingest/qdrant.txt +++ b/requirements/ingest/qdrant.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/qdrant.txt ingest/qdrant.in @@ -39,10 +39,9 @@ idna==3.6 # -c ingest/../base.txt # anyio # httpx -numpy==1.24.4 +numpy==1.26.3 # via # -c ingest/../base.txt - # -c ingest/../constraints.in # qdrant-client portalocker==2.8.2 # via qdrant-client @@ -60,7 +59,7 @@ sniffio==1.3.0 # via # anyio # httpx -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # pydantic diff --git a/requirements/ingest/reddit.txt b/requirements/ingest/reddit.txt index 7c6e92c4b9..ad86340d76 100644 --- a/requirements/ingest/reddit.txt +++ b/requirements/ingest/reddit.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/reddit.txt ingest/reddit.in diff --git a/requirements/ingest/s3.in b/requirements/ingest/s3.in index f2a99bf143..e6559763ff 100644 --- a/requirements/ingest/s3.in +++ b/requirements/ingest/s3.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt s3fs -fsspec==2023.9.1 +fsspec diff --git a/requirements/ingest/s3.txt b/requirements/ingest/s3.txt index b878cb9a26..d686008a77 100644 --- a/requirements/ingest/s3.txt +++ b/requirements/ingest/s3.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/s3.txt ingest/s3.in # -aiobotocore==2.5.4 +aiobotocore==2.7.0 # via s3fs aiohttp==3.9.1 # via @@ -16,19 +16,18 @@ aiosignal==1.3.1 # via aiohttp async-timeout==4.0.3 # via aiohttp -attrs==23.1.0 +attrs==23.2.0 # via aiohttp botocore==1.31.17 # via # -c ingest/../constraints.in # aiobotocore -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.1 +fsspec==2023.12.2 # via - # -c ingest/../constraints.in # -r ingest/s3.in # s3fs idna==3.6 @@ -45,13 +44,13 @@ python-dateutil==2.8.2 # via # -c ingest/../base.txt # botocore -s3fs==2023.9.1 +s3fs==2023.12.2 # via -r ingest/s3.in six==1.16.0 # via # -c ingest/../base.txt # python-dateutil -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c ingest/../base.txt # aioitertools @@ -64,5 +63,5 @@ wrapt==1.16.0 # via # -c ingest/../base.txt # aiobotocore -yarl==1.9.3 +yarl==1.9.4 # via aiohttp diff --git a/requirements/ingest/salesforce.txt b/requirements/ingest/salesforce.txt index e6573fb69f..1abd1318e5 100644 --- a/requirements/ingest/salesforce.txt +++ b/requirements/ingest/salesforce.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/salesforce.txt ingest/salesforce.in # -attrs==23.1.0 +attrs==23.2.0 # via zeep certifi==2023.11.17 # via @@ -25,13 +25,13 @@ idna==3.6 # requests isodate==0.6.1 # via zeep -lxml==4.9.3 +lxml==5.1.0 # via # -c ingest/../base.txt # zeep -more-itertools==10.1.0 +more-itertools==10.2.0 # via simple-salesforce -pendulum==2.1.2 +pendulum==3.0.0 # via simple-salesforce platformdirs==3.10.0 # via @@ -45,10 +45,9 @@ python-dateutil==2.8.2 # via # -c ingest/../base.txt # pendulum + # time-machine pytz==2023.3.post1 # via zeep -pytzdata==2020.1 - # via pendulum requests==2.31.0 # via # -c ingest/../base.txt @@ -68,6 +67,10 @@ six==1.16.0 # isodate # python-dateutil # requests-file +time-machine==2.13.0 + # via pendulum +tzdata==2023.4 + # via pendulum urllib3==1.26.18 # via # -c ingest/../base.txt diff --git a/requirements/ingest/sftp.txt b/requirements/ingest/sftp.txt index d63a0fca6a..d0db62d310 100644 --- a/requirements/ingest/sftp.txt +++ b/requirements/ingest/sftp.txt @@ -4,19 +4,17 @@ # # pip-compile --output-file=ingest/sftp.txt ingest/sftp.in # -bcrypt==4.0.1 +bcrypt==4.1.2 # via paramiko cffi==1.16.0 # via # cryptography # pynacl -cryptography==41.0.5 +cryptography==41.0.7 # via paramiko -fsspec==2023.9.1 - # via - # -c ingest/../constraints.in - # -r ingest/sftp.in -paramiko==3.3.1 +fsspec==2023.12.2 + # via -r ingest/sftp.in +paramiko==3.4.0 # via -r ingest/sftp.in pycparser==2.21 # via cffi diff --git a/requirements/ingest/sharepoint.in b/requirements/ingest/sharepoint.in index 0938277096..4cbace3e19 100644 --- a/requirements/ingest/sharepoint.in +++ b/requirements/ingest/sharepoint.in @@ -1,4 +1,4 @@ -c ../constraints.in -c ../base.txt msal -Office365-REST-Python-Client<2.4.3 +Office365-REST-Python-Client diff --git a/requirements/ingest/sharepoint.txt b/requirements/ingest/sharepoint.txt index 1196dfd580..2e869d8f82 100644 --- a/requirements/ingest/sharepoint.txt +++ b/requirements/ingest/sharepoint.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/sharepoint.txt ingest/sharepoint.in diff --git a/requirements/ingest/slack.txt b/requirements/ingest/slack.txt index 02a878985e..55f6d0e29f 100644 --- a/requirements/ingest/slack.txt +++ b/requirements/ingest/slack.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/slack.txt ingest/slack.in # -slack-sdk==3.26.1 +slack-sdk==3.26.2 # via -r ingest/slack.in diff --git a/requirements/ingest/weaviate.txt b/requirements/ingest/weaviate.txt index 315aabd93e..f5253c5935 100644 --- a/requirements/ingest/weaviate.txt +++ b/requirements/ingest/weaviate.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/weaviate.txt ingest/weaviate.in # -authlib==1.2.1 +authlib==1.3.0 # via weaviate-client certifi==2023.11.17 # via @@ -36,7 +36,7 @@ urllib3==1.26.18 # requests validators==0.22.0 # via weaviate-client -weaviate-client==3.25.3 +weaviate-client==3.26.0 # via # -c ingest/../constraints.in # -r ingest/weaviate.in diff --git a/requirements/ingest/wikipedia.txt b/requirements/ingest/wikipedia.txt index 34fe15b1d2..5203aca547 100644 --- a/requirements/ingest/wikipedia.txt +++ b/requirements/ingest/wikipedia.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=ingest/wikipedia.txt ingest/wikipedia.in diff --git a/requirements/test.txt b/requirements/test.txt index e9c8e7be9c..74841b5880 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=test.txt test.in @@ -8,7 +8,7 @@ appdirs==1.4.4 # via label-studio-tools autoflake==2.2.1 # via -r test.in -black==23.11.0 +black==23.12.1 # via -r test.in certifi==2023.11.17 # via @@ -24,21 +24,21 @@ click==8.1.7 # -c base.txt # -r test.in # black -coverage[toml]==7.3.2 +coverage[toml]==7.4.0 # via # -r test.in # pytest-cov exceptiongroup==1.2.0 # via pytest -flake8==6.1.0 +flake8==7.0.0 # via # -r test.in # flake8-print flake8-print==5.0.0 # via -r test.in -freezegun==1.3.1 +freezegun==1.4.0 # via -r test.in -grpcio==1.59.3 +grpcio==1.60.0 # via -r test.in idna==3.6 # via @@ -51,7 +51,7 @@ label-studio-sdk==0.0.32 # via -r test.in label-studio-tools==0.0.3 # via label-studio-sdk -lxml==4.9.3 +lxml==5.1.0 # via # -c base.txt # label-studio-sdk @@ -60,7 +60,7 @@ mccabe==0.7.0 # via flake8 multidict==6.0.4 # via yarl -mypy==1.7.1 +mypy==1.8.0 # via -r test.in mypy-extensions==1.0.0 # via @@ -72,7 +72,7 @@ packaging==23.2 # -c base.txt # black # pytest -pathspec==0.11.2 +pathspec==0.12.1 # via black platformdirs==3.10.0 # via @@ -89,11 +89,11 @@ pydantic==1.10.13 # -c constraints.in # -r test.in # label-studio-sdk -pyflakes==3.1.0 +pyflakes==3.2.0 # via # autoflake # flake8 -pytest==7.4.3 +pytest==7.4.4 # via # pytest-cov # pytest-mock @@ -111,7 +111,7 @@ requests==2.31.0 # via # -c base.txt # label-studio-sdk -ruff==0.1.7 +ruff==0.1.11 # via -r test.in six==1.16.0 # via @@ -126,15 +126,15 @@ tomli==2.0.1 # pytest types-click==7.1.8 # via -r test.in -types-markdown==3.5.0.3 +types-markdown==3.5.0.20240106 # via -r test.in types-requests==2.31.0.6 # via -r test.in -types-tabulate==0.9.0.3 +types-tabulate==0.9.0.20240106 # via -r test.in types-urllib3==1.26.25.14 # via types-requests -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # -c base.txt # black @@ -152,5 +152,5 @@ wrapt==1.16.0 # via # -c base.txt # vcrpy -yarl==1.9.3 +yarl==1.9.4 # via vcrpy diff --git a/scripts/pip-compile.sh b/scripts/pip-compile.sh index 90f1ca8f81..5d66809b28 100755 --- a/scripts/pip-compile.sh +++ b/scripts/pip-compile.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# python version must match lowest supported (3.8) +# python version must match lowest supported (3.9) major=3 -minor=8 +minor=9 if ! python -c "import sys; assert sys.version_info.major == $major and sys.version_info.minor == $minor"; then echo "python version not equal to expected $major.$minor: $(python --version)" exit 1 diff --git a/scripts/setup_al2.sh b/scripts/setup_al2.sh index 8555f807f0..6170f71435 100755 --- a/scripts/setup_al2.sh +++ b/scripts/setup_al2.sh @@ -70,7 +70,7 @@ eval "$(pyenv virtualenv-init -)" EOT # install python source "$HOME"/.bashrc - pyenv install 3.8.15 + pyenv install 3.10.4 fi EOF diff --git a/scripts/setup_ubuntu.sh b/scripts/setup_ubuntu.sh index b82080b736..8a2ba71788 100755 --- a/scripts/setup_ubuntu.sh +++ b/scripts/setup_ubuntu.sh @@ -72,7 +72,7 @@ eval "$(pyenv virtualenv-init -)" EOT # install python source "$HOME"/.bashrc - pyenv install 3.8.17 + pyenv install 3.10.4 fi EOF diff --git a/setup.py b/setup.py index 8b03b27de1..0e8e0d1fb6 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,7 @@ def load_requirements(file_list: Optional[Union[str, List[str]]] = None) -> List long_description_content_type="text/markdown", keywords="NLP PDF HTML CV XML parsing preprocessing", url="https://github.com/Unstructured-IO/unstructured", - python_requires=">=3.7.0", + python_requires=">=3.9.0,<3.12", classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", @@ -92,7 +92,6 @@ def load_requirements(file_list: Optional[Union[str, List[str]]] = None) -> List "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json index 591c1d74ce..87f3d9c07e 100644 --- a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json @@ -1,6 +1,6 @@ [ { - "element_id": "e83a347af95db7ba47b5351f411e00c7", + "element_id": "8088fbcca4eb780b8a4b8efe4b018860", "metadata": { "data_source": { "date_created": "2023-07-08T20:46:41-07:00", @@ -17,9 +17,9 @@ "eng" ], "page_number": 1, - "text_as_html": "
January 2023(Someonefed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.)
The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge.
Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" + "text_as_html": "
January 2023 ( Someone fed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge. Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" }, - "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", + "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", "type": "Table" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json index b911e0f745..1476531247 100644 --- a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json @@ -1,6 +1,6 @@ [ { - "element_id": "e83a347af95db7ba47b5351f411e00c7", + "element_id": "8088fbcca4eb780b8a4b8efe4b018860", "metadata": { "data_source": { "date_created": "2023-07-08T20:46:29-07:00", @@ -17,9 +17,9 @@ "eng" ], "page_number": 1, - "text_as_html": "
January 2023(Someonefed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.)
The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge.
Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" + "text_as_html": "
January 2023 ( Someone fed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge. Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" }, - "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", + "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", "type": "Table" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json index 17db561d02..430bd34bde 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json @@ -1,6 +1,6 @@ [ { - "element_id": "e83a347af95db7ba47b5351f411e00c7", + "element_id": "8088fbcca4eb780b8a4b8efe4b018860", "metadata": { "data_source": { "record_locator": { @@ -15,9 +15,9 @@ "eng" ], "page_number": 1, - "text_as_html": "
January 2023(Someonefed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.)
The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge.
Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" + "text_as_html": "
January 2023 ( Someone fed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge. Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" }, - "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", + "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", "type": "Table" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json index 58bea767a8..39f69b0038 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json @@ -1,6 +1,6 @@ [ { - "element_id": "e83a347af95db7ba47b5351f411e00c7", + "element_id": "8088fbcca4eb780b8a4b8efe4b018860", "metadata": { "data_source": { "record_locator": { @@ -15,9 +15,9 @@ "eng" ], "page_number": 1, - "text_as_html": "
January 2023(Someonefed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.)
The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge.
Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" + "text_as_html": "
January 2023 ( Someone fed my essays into GPT to make something that could answer
questions based on them, then asked it where good ideas come from. The
answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,
or missing, or broken? You can see anomalies in everyday life (much
of standup comedy is based on this), but the best place to look for
them is at the frontiers of knowledge. Knowledge grows fractally.
From a distance its edges look smooth, but when you learn enough
to get close to one, you'll notice it's full of gaps. These gaps
will seem obvious; it will seem inexplicable that no one has tried
x or wondered about y. In the best case, exploring such gaps yields
whole new fractal buds.
" }, - "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", + "text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.", "type": "Table" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json b/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json index 4bc48a5095..501ee455e1 100644 --- a/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json +++ b/test_unstructured_ingest/expected-structured-output/embed/book-war-and-peace-1p.txt.json @@ -2,6 +2,7 @@ { "type": "Title", "element_id": "d0e5cb42d9924bed3d7ed4f8db06ecaf", + "text": "CHAPTER I", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -16,7 +17,6 @@ "eng" ] }, - "text": "CHAPTER I", "embeddings": [ -0.03905360773205757, 0.06777488440275192, @@ -407,6 +407,7 @@ { "type": "NarrativeText", "element_id": "8e7b93275c6f8e58cc1b28493efda1fc", + "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -421,7 +422,6 @@ "eng" ] }, - "text": "\"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist--I really believe he is Antichrist--I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you--sit down and tell me all the news.\"", "embeddings": [ -0.05874710530042648, 0.10656478255987167, @@ -812,6 +812,7 @@ { "type": "NarrativeText", "element_id": "69843bc15c04968cc7528bda021d7fd2", + "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -826,7 +827,6 @@ "eng" ] }, - "text": "It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.", "embeddings": [ -0.05423668399453163, 0.05444527044892311, @@ -1217,6 +1217,7 @@ { "type": "NarrativeText", "element_id": "40a11496528c0849b69a9a3691d895ac", + "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -1231,7 +1232,6 @@ "eng" ] }, - "text": "All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:", "embeddings": [ 0.002359517617151141, 0.047255586832761765, @@ -1622,6 +1622,7 @@ { "type": "NarrativeText", "element_id": "535b8c5ae979892fa658ef113017f65f", + "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -1636,7 +1637,6 @@ "eng" ] }, - "text": "\"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10--Annette Scherer.\"", "embeddings": [ -0.04862005263566971, 0.036879975348711014, @@ -2027,6 +2027,7 @@ { "type": "NarrativeText", "element_id": "709bd60f9a74e1bae1dce2cb852fd285", + "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -2041,7 +2042,6 @@ "eng" ] }, - "text": "\"Heavens! what a virulent attack!\" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.", "embeddings": [ -0.01821090281009674, 0.14189432561397552, @@ -2432,6 +2432,7 @@ { "type": "NarrativeText", "element_id": "18f1d1fec4849ca9651f6a1d860dda63", + "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -2446,7 +2447,6 @@ "eng" ] }, - "text": "\"First of all, dear friend, tell me how you are. Set your friend's mind at rest,\" said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.", "embeddings": [ 0.016098741441965103, 0.12206722050905228, @@ -2837,6 +2837,7 @@ { "type": "NarrativeText", "element_id": "640111bd2f3cf71aadf0dbbfcd216bcc", + "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -2851,7 +2852,6 @@ "eng" ] }, - "text": "\"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?\" said Anna Pavlovna. \"You are staying the whole evening, I hope?\"", "embeddings": [ -0.016104821115732193, 0.05125334858894348, @@ -3242,6 +3242,7 @@ { "type": "NarrativeText", "element_id": "203e55d1aedd6876a30104dcd8c51425", + "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -3256,7 +3257,6 @@ "eng" ] }, - "text": "\"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there,\" said the prince. \"My daughter is coming for me to take me there.\"", "embeddings": [ 0.011866229586303234, 0.08889525383710861, @@ -3647,6 +3647,7 @@ { "type": "NarrativeText", "element_id": "82b8870c8d80978043c1db4df3b7e207", + "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -3661,7 +3662,6 @@ "eng" ] }, - "text": "\"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome.\"", "embeddings": [ -0.010653985664248466, 0.11256986111402512, @@ -4052,6 +4052,7 @@ { "type": "NarrativeText", "element_id": "e43850c5b521a136a2bf8df83224040f", + "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -4066,7 +4067,6 @@ "eng" ] }, - "text": "\"If they had known that you wished it, the entertainment would have been put off,\" said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.", "embeddings": [ -0.0202062726020813, 0.12951603531837463, @@ -4457,6 +4457,7 @@ { "type": "NarrativeText", "element_id": "8ff6545f658759f8b87daf51d36b3bc3", + "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -4471,7 +4472,6 @@ "eng" ] }, - "text": "\"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything.\"", "embeddings": [ -0.033423759043216705, 0.08513973653316498, @@ -4862,6 +4862,7 @@ { "type": "NarrativeText", "element_id": "5dc094cc3e4812145a3b72d09af9a478", + "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", "metadata": { "data_source": { "url": "example-docs/book-war-and-peace-1p.txt", @@ -4876,7 +4877,6 @@ "eng" ] }, - "text": "\"What can one say about it?\" replied the prince in a cold, listless tone. \"What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours.\"", "embeddings": [ -0.030620412901043892, 0.11658516526222229, diff --git a/test_unstructured_ingest/expected-structured-output/notion/122b2c22-996b-435b-9de2-ee0e9d2b04bc.json b/test_unstructured_ingest/expected-structured-output/notion/122b2c22-996b-435b-9de2-ee0e9d2b04bc.json index b44d3decce..84e752a0d2 100644 --- a/test_unstructured_ingest/expected-structured-output/notion/122b2c22-996b-435b-9de2-ee0e9d2b04bc.json +++ b/test_unstructured_ingest/expected-structured-output/notion/122b2c22-996b-435b-9de2-ee0e9d2b04bc.json @@ -1,6 +1,6 @@ [ { - "element_id": "0c2df749e3452be7112971d5847ab448", + "element_id": "749cdd39fe1a2398147fa0898b3363af", "metadata": { "data_source": { "date_created": "2023-08-04T18:31:00.000Z", @@ -11,9 +11,9 @@ "eng" ], "page_number": 1, - "text_as_html": "


Created time


Last edited time


Owner


Page


Tags


Verification




2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






New Page









unverified







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Morale Events





Policies







unverified







2023-08-04T18:31:00.000Z




2023-08-04T19:02:00.000Z






Roman Isecke






New Page With Verification









expired


Roman Isecke


2023-08-04T04:00:00.000Z - 2023-08-11T04:00:00.000Z







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Vacation Policy





Policies







unverified







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Mission, Vision, Values





Vision


Company Updates







unverified







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Recent Press





Company Updates







unverified







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Getting Started









unverified







2023-08-04T18:31:00.000Z




2023-08-17T18:48:00.000Z






Roman Isecke






Page with every block





Company Updates


Policies







expired


Roman Isecke


2023-08-04T04:00:00.000Z - 2023-11-02T04:00:00.000Z







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Corporate Travel





Policies







unverified







2023-08-04T18:31:00.000Z




2023-08-04T18:31:00.000Z






Roman Isecke






Benefits Policies





Policies







unverified




" + "text_as_html": "
Created timeLast edited timeOwnerPageTagsVerification
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeNew Pageunverified
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeMorale EventsPoliciesunverified
2023-08-04T18:31:00.000Z2023-08-04T19:02:00.000ZRoman IseckeNew Page With Verificationexpired Roman Isecke 2023-08-04T04:00:00.000Z - 2023-08-11T04:00:00.000Z
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeVacation PolicyPoliciesunverified
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeMission, Vision, ValuesVision Company Updatesunverified
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeRecent PressCompany Updatesunverified
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeGetting Startedunverified
2023-08-04T18:31:00.000Z2023-08-17T18:48:00.000ZRoman IseckePage with every blockCompany Updates Policiesexpired Roman Isecke 2023-08-04T04:00:00.000Z - 2023-11-02T04:00:00.000Z
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeCorporate TravelPoliciesunverified
2023-08-04T18:31:00.000Z2023-08-04T18:31:00.000ZRoman IseckeBenefits PoliciesPoliciesunverified
" }, - "text": "Created time\n \n \n Last edited time\n \n \n Owner\n \n \n Page\n \n \n Tags\n \n \n Verification\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n New Page\n \n \n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Morale Events\n \n \n \n \n \n Policies\n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T19:02:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n New Page With Verification\n \n \n \n \n \n \n \n \n \n expired\n \n \n Roman Isecke\n \n \n 2023-08-04T04:00:00.000Z - 2023-08-11T04:00:00.000Z\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Vacation Policy\n \n \n \n \n \n Policies\n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Mission, Vision, Values\n \n \n \n \n \n Vision\n \n \n Company Updates\n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Recent Press\n \n \n \n \n \n Company Updates\n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Getting Started\n \n \n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-17T18:48:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Page with every block \n \n \n \n \n \n Company Updates\n \n \n Policies\n \n \n \n \n \n \n \n expired\n \n \n Roman Isecke\n \n \n 2023-08-04T04:00:00.000Z - 2023-11-02T04:00:00.000Z\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Corporate Travel\n \n \n \n \n \n Policies\n \n \n \n \n \n \n \n unverified\n \n \n \n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n 2023-08-04T18:31:00.000Z\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n \n \n Benefits Policies\n \n \n \n \n \n Policies\n \n \n \n \n \n \n \n unverified", + "text": "Created time Last edited time Owner Page Tags Verification 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke New Page unverified 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Morale Events Policies unverified 2023-08-04T18:31:00.000Z 2023-08-04T19:02:00.000Z Roman Isecke New Page With Verification expired Roman Isecke 2023-08-04T04:00:00.000Z - 2023-08-11T04:00:00.000Z 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Vacation Policy Policies unverified 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Mission, Vision, Values Vision Company Updates unverified 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Recent Press Company Updates unverified 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Getting Started unverified 2023-08-04T18:31:00.000Z 2023-08-17T18:48:00.000Z Roman Isecke Page with every block Company Updates Policies expired Roman Isecke 2023-08-04T04:00:00.000Z - 2023-11-02T04:00:00.000Z 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Corporate Travel Policies unverified 2023-08-04T18:31:00.000Z 2023-08-04T18:31:00.000Z Roman Isecke Benefits Policies Policies unverified", "type": "Table" } ] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/notion/c47a4566-4c7a-488b-ac2a-1292ee507fcb.json b/test_unstructured_ingest/expected-structured-output/notion/c47a4566-4c7a-488b-ac2a-1292ee507fcb.json index d23bf1fcab..33e60e2b71 100644 --- a/test_unstructured_ingest/expected-structured-output/notion/c47a4566-4c7a-488b-ac2a-1292ee507fcb.json +++ b/test_unstructured_ingest/expected-structured-output/notion/c47a4566-4c7a-488b-ac2a-1292ee507fcb.json @@ -114,7 +114,7 @@ "type": "UncategorizedText" }, { - "element_id": "8298c3f1d0016deb9cbf44832c33480c", + "element_id": "2209179731e74e3b2e89ecdf777cae7c", "metadata": { "data_source": { "date_created": "2023-08-04T18:31:00.000Z", @@ -125,9 +125,9 @@ "eng" ], "page_number": 1, - "text_as_html": "


column 1


column 2


pages



c1r1

content



c2r1 table
2023-08-08T09:00:00.000-04:00
cell



Page with every block





c1r2 more

content



c2r2 table

cell




Untitled





this is some green text


this is

an


equation




Untitled





text1



text2



Multiline cell


Another cell



Untitled



" + "text_as_html": "
column 1column 2pages
c1r1 contentc2r1 table
2023-08-08T09:00:00.000-04:00
cell
Page with every block
c1r2 more contentc2r2 table cellUntitled
this is some green textthis is an equationUntitled
text1 text2 Multiline cellAnother cellUntitled
" }, - "text": "column 1\n \n \n column 2\n \n \n pages\n \n \n \n c1r1 \n \n content \n \n \n \n c2r1 table \n 2023-08-08T09:00:00.000-04:00\n cell\n \n \n \n Page with every block \n \n \n \n \n \n c1r2 more \n \n content\n \n \n \n c2r2 table \n \n cell\n \n \n \n \n Untitled\n \n \n \n \n \n this is some green text\n \n \n this is \n \n an \n \n \n equation\n \n \n \n \n Untitled\n \n \n \n \n \n text1\n\n\n \n text2\n \n \n\nMultiline cell\n \n \n Another cell \n \n \n \n Untitled", + "text": "column 1 column 2 pages c1r1 content c2r1 table \n 2023-08-08T09:00:00.000-04:00\n cell Page with every block c1r2 more content c2r2 table cell Untitled this is some green text this is an equation Untitled text1 text2 Multiline cell Another cell Untitled", "type": "Table" }, { diff --git a/test_unstructured_ingest/expected-structured-output/notion/d3d87fc6-61cc-4bb5-89ed-e9dff0df1526.json b/test_unstructured_ingest/expected-structured-output/notion/d3d87fc6-61cc-4bb5-89ed-e9dff0df1526.json deleted file mode 100644 index a496fb499c..0000000000 --- a/test_unstructured_ingest/expected-structured-output/notion/d3d87fc6-61cc-4bb5-89ed-e9dff0df1526.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - { - "element_id": "2026c17673ac794e40e78d1c8e28df5c", - "metadata": { - "data_source": { - "date_created": "2023-08-02T20:36:00.000Z", - "date_modified": "2023-08-17T18:49:00.000Z" - }, - "filetype": "text/html", - "languages": [ - "eng" - ], - "page_number": 1, - "text_as_html": "


Completed tasks


Dates


My Checkbox


My Created By


My Created Time


My Date


My Email


My Person


My Text


My formula


My multiselect


My number


My phone num


My select


Sprint ID


Sprint name


Sprint status


Tasks


Total tasks


URL




0




2023-08-14 - 2023-08-27









Roman Isecke




2023-08-02T20:36:00.000Z




2023-08-31




email@custom.domaine






DevOps-Bot









False





Option 1





12










SPRI1-2







Next




notion://sprints/sprint_task_relation




1








0




2023-08-28 - 2023-09-10









Roman Isecke




2023-08-02T20:36:00.000Z




2023-08-29T00:00:00.000-04:00 - 2023-08-31T00:00:00.000-04:00




text








More



text



with



link






False










45666645345465454




option 1




SPRI1-3




Sprint 3




Future




notion://sprints/sprint_task_relation




1








0.25




2023-07-31 - 2023-08-13









Roman Isecke




2023-08-02T20:36:00.000Z




2023-08-07




roman@unstructured.io






Roman Isecke




Jason Scheirer







This is some



formatted



text





True





Option 2


Option 1





32




1234




option 2




SPRI1-1




Sprint 1




Current




notion://sprints/sprint_task_relation




4




www.google.com


" - }, - "text": "Completed tasks\n \n \n Dates\n \n \n My Checkbox\n \n \n My Created By\n \n \n My Created Time\n \n \n My Date\n \n \n My Email\n \n \n My Person\n \n \n My Text\n \n \n My formula\n \n \n My multiselect\n \n \n My number\n \n \n My phone num\n \n \n My select\n \n \n Sprint ID\n \n \n Sprint name\n \n \n Sprint status\n \n \n Tasks\n \n \n Total tasks\n \n \n URL\n \n \n \n \n 0\n \n \n \n \n 2023-08-14 - 2023-08-27\n \n \n \n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n 2023-08-02T20:36:00.000Z\n \n \n \n \n 2023-08-31\n \n \n \n \n email@custom.domaine\n \n \n \n \n \n \n DevOps-Bot\n \n \n \n \n \n \n \n \n \n False\n \n \n \n \n \n Option 1\n \n \n \n \n \n 12\n \n \n \n \n \n \n \n \n \n \n SPRI1-2\n \n \n \n \n \n \n \n Next\n \n \n \n \n notion://sprints/sprint_task_relation\n \n \n \n \n 1\n \n \n \n \n \n \n \n \n 0\n \n \n \n \n 2023-08-28 - 2023-09-10\n \n \n \n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n 2023-08-02T20:36:00.000Z\n \n \n \n \n 2023-08-29T00:00:00.000-04:00 - 2023-08-31T00:00:00.000-04:00\n \n \n \n \n text\n \n \n \n \n \n \n \n \n More \n \n \n \n text\n \n \n \n with \n \n \n \n link\n \n \n \n \n \n \n False\n \n \n \n \n \n \n \n \n \n \n 45666645345465454\n \n \n \n \n option 1\n \n \n \n \n SPRI1-3\n \n \n \n \n Sprint 3\n \n \n \n \n Future\n \n \n \n \n notion://sprints/sprint_task_relation\n \n \n \n \n 1\n \n \n \n \n \n \n \n \n 0.25\n \n \n \n \n 2023-07-31 - 2023-08-13\n \n \n \n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n 2023-08-02T20:36:00.000Z\n \n \n \n \n 2023-08-07\n \n \n \n \n roman@unstructured.io\n \n \n \n \n \n \n Roman Isecke\n \n \n \n \n Jason Scheirer\n \n \n \n \n \n \n \n This is some \n \n \n \n formatted\n \n \n \n text\n \n \n \n \n \n True\n \n \n \n \n \n Option 2\n \n \n Option 1\n \n \n \n \n \n 32\n \n \n \n \n 1234\n \n \n \n \n option 2\n \n \n \n \n SPRI1-1\n \n \n \n \n Sprint 1\n \n \n \n \n Current\n \n \n \n \n notion://sprints/sprint_task_relation\n \n \n \n \n 4\n \n \n \n \n www.google.com", - "type": "Table" - } -] \ No newline at end of file diff --git a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json index 534bf35ca5..15edcd803b 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json @@ -3,7 +3,7 @@ "element_id": "0a5f21a42e259b1ae44adc8758f2db19", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -24,7 +24,7 @@ "element_id": "c2218d09ef001e697de0e0676777a643", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -45,7 +45,7 @@ "element_id": "1cea24ad0aab30d447fc2b47dcd4f259", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -66,7 +66,7 @@ "element_id": "abcb617ca920c453f3e353e1e2d6885b", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -87,7 +87,7 @@ "element_id": "e7ea93453698b4f8bc32fd7cb860617e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -108,7 +108,7 @@ "element_id": "dd494a076f4a875e3ff8591dd84e3bcb", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -129,7 +129,7 @@ "element_id": "69c0abb4a05d8b3650ac06c6c07c3b88", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -150,7 +150,7 @@ "element_id": "40f2b406a410dadedbf14e1310e6fd94", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -171,7 +171,7 @@ "element_id": "0953470500eb215048fd49263b8829a4", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -192,7 +192,7 @@ "element_id": "fa4f01ba2113b0b7859f01f31ef6c5b1", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -213,7 +213,7 @@ "element_id": "895af44c7b71b95db8e28f86ef2224c8", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -234,7 +234,7 @@ "element_id": "a4bc8a1fe50aa3ff61ba53d96830f9c1", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -255,7 +255,7 @@ "element_id": "c99869e52743869e29fd645e9e0df6fb", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -276,7 +276,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -297,7 +297,7 @@ "element_id": "78f4f092ce77888950ef2172b8d2a92a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -318,7 +318,7 @@ "element_id": "c140ad5c30b6075c1a553eddacd8eca5", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -339,7 +339,7 @@ "element_id": "323d79e74460eda1fb0f8d55a2e0ff42", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -360,7 +360,7 @@ "element_id": "49dca65f362fee401292ed7ada96f962", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -381,7 +381,7 @@ "element_id": "007b2203e9e86a49c3108e9ffd16fbbc", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -402,7 +402,7 @@ "element_id": "808caaef5b114d874a25b7fec21b5516", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -423,7 +423,7 @@ "element_id": "c2c7be4534a60790d1d18451c91dc138", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -444,7 +444,7 @@ "element_id": "28a5aa3897d66de6c31caba99a4c337e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -465,7 +465,7 @@ "element_id": "2c469f8fa0f3c1c771330dde1be1b28c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -486,7 +486,7 @@ "element_id": "3a162049bc9ee88b56d4d4bf5897368f", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -507,7 +507,7 @@ "element_id": "cc874418b59b7ecb37a2c938783fb5ce", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -528,7 +528,7 @@ "element_id": "cc874418b59b7ecb37a2c938783fb5ce", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -549,7 +549,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -570,7 +570,7 @@ "element_id": "c146a58289e616dfc7ba0154e66a262b", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -591,7 +591,7 @@ "element_id": "26a20452d058d66ad402559f659cec7c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -612,7 +612,7 @@ "element_id": "5779b9b7d25794d3b4ed1fe4e61f6617", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -633,7 +633,7 @@ "element_id": "e2faa573314abd00886d18714776ff1e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -654,7 +654,7 @@ "element_id": "7cd3302c25869c2f5421bee2f41417be", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -675,7 +675,7 @@ "element_id": "3416e531cf0cb72208991b73db6ae3ef", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -696,7 +696,7 @@ "element_id": "ce9fc96e38d94a623bb7ffee822ac214", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -717,7 +717,7 @@ "element_id": "78f4bc5a836981f7b1ccfd9ad718cc72", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -738,7 +738,7 @@ "element_id": "ab9d11a9dd37cfd5e1876f40777a4480", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -759,7 +759,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -780,7 +780,7 @@ "element_id": "8ab8fca37bfc3201f31244df6b7c4d82", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -801,7 +801,7 @@ "element_id": "2b03eaeb94d29bd57f35cc895c8e50c8", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -822,7 +822,7 @@ "element_id": "8674194ed5ca8d731d521f16b602a7ff", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -843,7 +843,7 @@ "element_id": "e56ea100a4185b69f955d45ff914ef57", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -864,7 +864,7 @@ "element_id": "d2020af82c98a5ae355bf22ab3261e6b", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -885,7 +885,7 @@ "element_id": "94385c9d723aa1c5f156fc9fad3ccc88", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -906,7 +906,7 @@ "element_id": "afde979c99a73646915fe253c85c5a9c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -927,7 +927,7 @@ "element_id": "25072141a0ed1c9474256def9a721513", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -948,7 +948,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -969,7 +969,7 @@ "element_id": "ae86527b9b053129da62dcb5ed3c8aec", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -990,7 +990,7 @@ "element_id": "25e2f1dc031b5421b8a234945098e58b", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1011,7 +1011,7 @@ "element_id": "3dfc45d3333ae253d78008c8cde2d752", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1032,7 +1032,7 @@ "element_id": "4e5a8cc0fcd53f25fe8e41091f016f50", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1053,7 +1053,7 @@ "element_id": "d93c27500d6e6fc5e73a1e35fe0a36ff", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1074,7 +1074,7 @@ "element_id": "11ebd9f4c9a7cdbac41f8f7399d3950e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1095,7 +1095,7 @@ "element_id": "ed6efde0729a7a59cfd24802fa6edb51", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1116,7 +1116,7 @@ "element_id": "30c61ae1849c6b38dd09c21d3d4f5951", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1137,7 +1137,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1158,7 +1158,7 @@ "element_id": "4547a3f05b931a26f1cfa16dba70e121", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1179,7 +1179,7 @@ "element_id": "d11a1c04bd3a9891350b4bd94104df58", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1200,7 +1200,7 @@ "element_id": "8174b87b76dfe8e8ddb31ab83abc6c33", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1221,7 +1221,7 @@ "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1242,7 +1242,7 @@ "element_id": "0c76bc4e35219e2a31b09428cd47d009", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1263,7 +1263,7 @@ "element_id": "3c0578f4d944258ffa4ffac7615f1ff9", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1284,7 +1284,7 @@ "element_id": "6bb1e757e09d7fa3aba323a375abd047", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1305,7 +1305,7 @@ "element_id": "9db439c530ed3425c0a68724de199942", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1326,7 +1326,7 @@ "element_id": "b7948d6976e997e76e343161b4b5d864", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1347,7 +1347,7 @@ "element_id": "e7ac421147471fe341ae242e7544a44c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1368,7 +1368,7 @@ "element_id": "4b48b0469ba9682a3e385ee7fbb6bbed", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1389,7 +1389,7 @@ "element_id": "5277334fd8abe869f6a8de2e43942c9d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1410,7 +1410,7 @@ "element_id": "44f0ab7953bb0b3696b9fa3cf0682f35", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1431,7 +1431,7 @@ "element_id": "08e781dd2b6499b1ac8105a47f3520cc", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1452,7 +1452,7 @@ "element_id": "e586cf66e92b356a4611ee2ffdf85a16", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1473,7 +1473,7 @@ "element_id": "46c8e0c55b163d73d3d2766be8d1bf8d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1494,7 +1494,7 @@ "element_id": "373341046b8ef33d588d6038817b98a0", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1515,7 +1515,7 @@ "element_id": "eff8c9923ee635be60ad8c6d7e891f42", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1536,7 +1536,7 @@ "element_id": "f7d988c7d799cc7eec1527f363785a8c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1557,7 +1557,7 @@ "element_id": "41d85a7cc007a9c34136a786d6e61c15", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1578,7 +1578,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1599,7 +1599,7 @@ "element_id": "75f95ac86db86ab3315a272430deda68", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1620,7 +1620,7 @@ "element_id": "2bbe57e6c291db638d3fcddca9e0199a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1641,7 +1641,7 @@ "element_id": "a5ef6992dc2c2b5b122b764e5f23d66a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1662,7 +1662,7 @@ "element_id": "ddf9839e0b050a2c7b2e3a502e0be91c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1683,7 +1683,7 @@ "element_id": "925934587d21dc69b7040e5299c3957e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1704,7 +1704,7 @@ "element_id": "396c7bd25bf4d65da962d23adbb8cbe2", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1725,7 +1725,7 @@ "element_id": "2d14934d52ff357c52e9ae1c38f7390e", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1746,7 +1746,7 @@ "element_id": "bc6284f4d0f59f3cdadf10b2efc77c18", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1767,7 +1767,7 @@ "element_id": "d4f397f8b162452852c0a8fb898c5e8a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1788,7 +1788,7 @@ "element_id": "828061b3c51e703fa7f8a7d5fc8271b1", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1809,7 +1809,7 @@ "element_id": "7250b07d7951c2b7b39c79195f4e69e7", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1830,7 +1830,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1851,7 +1851,7 @@ "element_id": "fa5b96150a767439834d76af71b6f7a8", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1872,7 +1872,7 @@ "element_id": "a81cc4e3ca23fd16254e2b858cdcb00a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1893,7 +1893,7 @@ "element_id": "91c5d881804324614a0f24e7d96ad42a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1914,7 +1914,7 @@ "element_id": "55fff1c4b3f7a8ff06a6c46d21d66cd1", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1935,7 +1935,7 @@ "element_id": "4e5bd723dd23dd1d479acbfcf2e5a9d5", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1956,7 +1956,7 @@ "element_id": "a4b8951a10e5e3d05b83227fcc4d02d8", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1977,7 +1977,7 @@ "element_id": "6a18bf042a98d772d5995a6140cc1a9c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -1998,7 +1998,7 @@ "element_id": "64e79c327a2e74933f25bd3e9caf09f5", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2019,7 +2019,7 @@ "element_id": "a9811a5a7bebc1f7a97bf6ca7ca5c890", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2040,7 +2040,7 @@ "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2061,7 +2061,7 @@ "element_id": "907d316caf83dc6066a2a29e7a671f7d", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2082,7 +2082,7 @@ "element_id": "17e2197f8b893177f51752cab299e36f", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2103,7 +2103,7 @@ "element_id": "bc6f2a8f19639ee36e3298fa992b76e6", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2124,7 +2124,7 @@ "element_id": "b411a2a97519dabf36abc764adda53dc", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2145,7 +2145,7 @@ "element_id": "5dff888d3e9f315d88116aa3660686f9", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2166,7 +2166,7 @@ "element_id": "3d668f85b6a6bd19156d78c62f740006", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2187,7 +2187,7 @@ "element_id": "950260886a2d8eb808263bc73ec898f1", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2208,7 +2208,7 @@ "element_id": "556c04bb61f902927d202956f7d2d6fd", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2229,7 +2229,7 @@ "element_id": "cbb9553ae9412cc864f9f254b47c3efc", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2250,7 +2250,7 @@ "element_id": "cd9e31727baaddee4567c7ef27c4937a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2271,7 +2271,7 @@ "element_id": "a07efcab5056130c10048443a2bf8a3a", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2292,7 +2292,7 @@ "element_id": "9875c08b39a4905c52bef432c042c0bb", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2313,7 +2313,7 @@ "element_id": "d750b11efc2f858c7deadb09e3929e1c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2334,7 +2334,7 @@ "element_id": "3e5456c4e156292c6284a528b3d3fb0c", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2355,7 +2355,7 @@ "element_id": "e118be83abfed92b8969eca98bb4d53b", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2376,7 +2376,7 @@ "element_id": "6ef230728534d871e5126e2a55e12b26", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2397,7 +2397,7 @@ "element_id": "6215d8f373972db90d05458d63af9efe", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2418,7 +2418,7 @@ "element_id": "da431b9817da923cc48a538c4b3b8ade", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2439,7 +2439,7 @@ "element_id": "d073e054fbe8931eb0e200b268710187", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2460,7 +2460,7 @@ "element_id": "9aa84a204c906f24862e2e3326cac381", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2481,7 +2481,7 @@ "element_id": "bab943d841e99d44807adb96ef9ef925", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" @@ -2502,7 +2502,7 @@ "element_id": "0e1a77d4edd2f7419db77d9cdff99551", "metadata": { "data_source": { - "date_modified": "2023-02-14T07:31:28", + "date_modified": "2023-02-14T07:31:28+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" diff --git a/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json index 558652deae..1ad0028d56 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/Silent-Giant-(1).pdf.json @@ -3,7 +3,7 @@ "element_id": "80f1cd7f1c8e281093a32842b1e5bbce", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -24,7 +24,7 @@ "element_id": "b0f30096c91b740f061fd09cc6b86f2b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -45,7 +45,7 @@ "element_id": "c56cacd540b7d88bfe08d824c0ced992", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -66,7 +66,7 @@ "element_id": "14547603bad3329c14c74b8c4e2ff8d9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -87,7 +87,7 @@ "element_id": "d4d5f1410356bb0053eeff6a9d2e84ae", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -108,7 +108,7 @@ "element_id": "968d079b21338bb15f266e70ed001fed", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -129,7 +129,7 @@ "element_id": "b454d477ce7af47a3cce791045b27fb7", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -150,7 +150,7 @@ "element_id": "b00c97f0af489349e26f90166924530c", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -171,7 +171,7 @@ "element_id": "8ab9e34861e39e7de50648b982afd89e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -192,7 +192,7 @@ "element_id": "2ee5188001ff61cf0d0e40659ce7bc49", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -213,7 +213,7 @@ "element_id": "1d53fd350bd8dd3a38db6787b7ef77cf", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -234,7 +234,7 @@ "element_id": "01b622a95350cc48bafc9165732d661a", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -255,7 +255,7 @@ "element_id": "494d55b19020277b68f13daf5464a252", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -276,7 +276,7 @@ "element_id": "d2bff80ca96af626923ef67c2a927f2f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -297,7 +297,7 @@ "element_id": "93e7dedc9d334470067ad2de1f9ee788", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -318,7 +318,7 @@ "element_id": "0fc5165686190ca845407c03ad4572e8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -339,7 +339,7 @@ "element_id": "5749fdd6b67e4204b3047ba33540bc87", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -360,7 +360,7 @@ "element_id": "3b5a5220792fcbec0b59d2088bc4c9ab", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -381,7 +381,7 @@ "element_id": "542bf86ba9bab1357e3aaa0b4ae0ff70", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -402,7 +402,7 @@ "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -423,7 +423,7 @@ "element_id": "b4af08fb653ae7dea99f3a48c2ff7f5d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -444,7 +444,7 @@ "element_id": "d5e389eb1b6b367ac5cf6e12acccfcbc", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -465,7 +465,7 @@ "element_id": "81a83544cf93c245178cbc1620030f11", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -486,7 +486,7 @@ "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -507,7 +507,7 @@ "element_id": "73a2af8864fc500fa49048bf3003776c", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -528,7 +528,7 @@ "element_id": "8e1f192fe25ad49be764c3f55c68beb3", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -549,7 +549,7 @@ "element_id": "df34d853f2f2f1f14b92359f695426dc", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -570,7 +570,7 @@ "element_id": "befa378e171bb64fdf68091abf3501bd", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -591,7 +591,7 @@ "element_id": "2ab7465d2e5fa24b6724907f968cd4aa", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -612,7 +612,7 @@ "element_id": "76b7731e68f6ef2b8958ea4f1ec351af", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -633,7 +633,7 @@ "element_id": "a5d60fc4dbbd484074d8389c35703cf7", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -654,7 +654,7 @@ "element_id": "152a83b89c4b24f7f3db154d0c3ddc1b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -675,7 +675,7 @@ "element_id": "a7be8e1fe282a37cd666e0632b17d933", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -696,7 +696,7 @@ "element_id": "e78f27ab3ef177a9926e6b90e572b985", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -717,7 +717,7 @@ "element_id": "81a83544cf93c245178cbc1620030f11", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -738,7 +738,7 @@ "element_id": "a20a2b7bb0842d5cf8a0c06c626421fd", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -759,7 +759,7 @@ "element_id": "7d12ba56e9f8b3dc64f77c87318c4f37", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -780,7 +780,7 @@ "element_id": "a85e9db4851f7cd3efb8db7bf69a07cf", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -801,7 +801,7 @@ "element_id": "e8facf920827e7bb6c64b065223f7c1e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -822,7 +822,7 @@ "element_id": "5f0bfed5240c4c0d50a2b4d2b56d8e6f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -843,7 +843,7 @@ "element_id": "a58efe5247845dc40b0a648f1761ad5c", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -864,7 +864,7 @@ "element_id": "e81223a218fb419ef983253212ce7e22", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -885,7 +885,7 @@ "element_id": "70d25f2c1428def16804c3b346ee8d13", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -906,7 +906,7 @@ "element_id": "0b0bd7ca2acebad288fe09c9d9595f1f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -927,7 +927,7 @@ "element_id": "6db8df46f6adb58be10bf8d88e53d42e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -948,7 +948,7 @@ "element_id": "d2b22b470eb3ab5829c6e3efb55e49a4", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -969,7 +969,7 @@ "element_id": "fde5c605b758e800b43c8e5844d0eb39", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -990,7 +990,7 @@ "element_id": "c262b08f408059316ea2bfe106d4996b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1011,7 +1011,7 @@ "element_id": "45a4d31b0300260dec8f3f86ec2ba0ad", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1032,7 +1032,7 @@ "element_id": "c550d4a50eaa40717f1d857bce491a81", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1053,7 +1053,7 @@ "element_id": "e8dbac2cdc67e714e99baa9c0f6a54b9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1074,7 +1074,7 @@ "element_id": "2a5e6485f55769e5d4c820cb79f018d7", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1095,7 +1095,7 @@ "element_id": "c23c8bcefbe3c4d1e5b55df29b717fc1", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1116,7 +1116,7 @@ "element_id": "2e16c4200e350d951a1911e164cc7a7d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1137,7 +1137,7 @@ "element_id": "5b702c79a0deb88609b6a9b76a8ff4b1", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1158,7 +1158,7 @@ "element_id": "34e41476b3562e66cc04edd8a2ba4eb7", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1179,7 +1179,7 @@ "element_id": "5e1e7b461ccf41232b3daf5b54d59399", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1200,7 +1200,7 @@ "element_id": "aec400e3e65dc09b31513694bc9893b9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1221,7 +1221,7 @@ "element_id": "29399af043bbf069ecfd1abdcaee4b15", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1242,7 +1242,7 @@ "element_id": "2fb9de7ce072904e3da50fc724ce8b12", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1263,7 +1263,7 @@ "element_id": "dbae772db29058a88f9bd830e957c695", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1284,7 +1284,7 @@ "element_id": "2d52b4cb071eb1384e8f64581d907335", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1305,7 +1305,7 @@ "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1326,7 +1326,7 @@ "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1347,7 +1347,7 @@ "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1368,7 +1368,7 @@ "element_id": "380918b946a526640a40df5dced65167", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1389,7 +1389,7 @@ "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1410,7 +1410,7 @@ "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1431,7 +1431,7 @@ "element_id": "c12be3875fcb0e34681e80c68ced624f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1452,7 +1452,7 @@ "element_id": "3472694e01617965f38369c536bdc070", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1473,7 +1473,7 @@ "element_id": "49b5bd9868a164cf35b10cbb343ddba0", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1494,7 +1494,7 @@ "element_id": "bbf3f11cb5b43e700273a78d12de55e4", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1515,7 +1515,7 @@ "element_id": "ad57366865126e55649ecb23ae1d4888", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1536,7 +1536,7 @@ "element_id": "67ff7489d537e35454934b9dc3a725f9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1557,7 +1557,7 @@ "element_id": "5feceb66ffc86f38d952786c6d696c79", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1578,7 +1578,7 @@ "element_id": "7a1ca4ef7515f7276bae7230545829c2", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1599,7 +1599,7 @@ "element_id": "e720e656050a5b95706b7ffefb3ff505", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1620,7 +1620,7 @@ "element_id": "0b7637d5dd3c0e09980fb400ebbdcf72", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1641,7 +1641,7 @@ "element_id": "f18421197d4b12d362e1ededa3f3145f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1662,7 +1662,7 @@ "element_id": "dddeec4eec1ff6db9e832ed00fea1b7e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1683,7 +1683,7 @@ "element_id": "563a2980d46c81119e1d7d952b375a41", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1704,7 +1704,7 @@ "element_id": "3d193aba53b016527c3f658cdd6d99e2", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1725,7 +1725,7 @@ "element_id": "13d3d626e6be5671aed83c1270851087", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1746,7 +1746,7 @@ "element_id": "c671a138a73229210d751a2857ff503d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1767,7 +1767,7 @@ "element_id": "2f701c7144dd1588cdb70e2a188c1418", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1788,7 +1788,7 @@ "element_id": "a0fef2750bd4fbae8e9e28211ceea788", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1809,7 +1809,7 @@ "element_id": "37eafc6215975a858fa506f14ea98982", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1830,7 +1830,7 @@ "element_id": "c69b33366ea0bcfb6c30799a4100c6a0", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1851,7 +1851,7 @@ "element_id": "671a02117af87c2371462f800d856f15", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1872,7 +1872,7 @@ "element_id": "c948e12a05e40ac00f3e5321f6ae7742", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1893,7 +1893,7 @@ "element_id": "0748d61bf0b14f9f30e6cfbfb57034d4", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1914,7 +1914,7 @@ "element_id": "395face9e1f924d1f2a0d746f317c5b9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1935,7 +1935,7 @@ "element_id": "a0d7deccf89e42d02a9d66b0c1889689", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1956,7 +1956,7 @@ "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1977,7 +1977,7 @@ "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -1998,7 +1998,7 @@ "element_id": "f5557d4fcf727a981a3c315aca733eef", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2019,7 +2019,7 @@ "element_id": "0ab306823035661bb8dba21cc2535231", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2040,7 +2040,7 @@ "element_id": "d3fc2842ddfad4c8d3859f84d4439bfd", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2061,7 +2061,7 @@ "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2082,7 +2082,7 @@ "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2103,7 +2103,7 @@ "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2124,7 +2124,7 @@ "element_id": "c1d2906220d1eef1b17422b7132872a8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2145,7 +2145,7 @@ "element_id": "de72de35f0092bdd3107011f3be18dc0", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2166,7 +2166,7 @@ "element_id": "dcf0fcce0dd00a5335e8e42658aacc75", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2187,7 +2187,7 @@ "element_id": "5897aff759a5cc8d94710101c73af296", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2208,7 +2208,7 @@ "element_id": "ed171375d0bf81eaa5512140c3a29b8f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2229,7 +2229,7 @@ "element_id": "0b9ea5c0804f5a369317ffcf363badf3", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2250,7 +2250,7 @@ "element_id": "c2fb0a6722612bb6055e56fea799a81b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2271,7 +2271,7 @@ "element_id": "06d4880e4a23a9520618a50bfbbdf940", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2292,7 +2292,7 @@ "element_id": "8ff63a0f4af4de37eff90952d575f76d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2313,7 +2313,7 @@ "element_id": "4ac69378210b56e0c98be2d41e374769", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" @@ -2334,7 +2334,7 @@ "element_id": "3df4f87a759f8e1a2e3ab0a186ac16ef", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:10:36", + "date_modified": "2023-02-12T10:10:36+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/Silent-Giant-(1).pdf" diff --git a/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json index a246b0c7d6..c1b2a293b5 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/page-with-formula.pdf.json @@ -3,7 +3,7 @@ "element_id": "1fced17e7fb29d9a55193a3c33b57446", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -24,7 +24,7 @@ "element_id": "2034a880526bd0e8273295f7d63a2286", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -45,7 +45,7 @@ "element_id": "27b36f031306ed6ef5cf87c24b66bd0c", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -66,7 +66,7 @@ "element_id": "b64b0c84c1b06a2d8249079dd71405d8", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -87,7 +87,7 @@ "element_id": "640df3a8e4d5fae30486497226b5c9b8", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -108,7 +108,7 @@ "element_id": "6cae52b99feb7821d42d1e968612c58e", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -129,7 +129,7 @@ "element_id": "4750f5c635c72f7add8147f05e46a812", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -150,7 +150,7 @@ "element_id": "5c4f25461422e00e53ecfd09bbe78dfa", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -171,7 +171,7 @@ "element_id": "e9caf0a0f1f415cf9842ac607eaff0ff", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -192,7 +192,7 @@ "element_id": "b59bd0cd0e3c6ed28f3567ec55e14bc7", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -213,7 +213,7 @@ "element_id": "9d4113060fbfb7435932cad61b0e922a", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -234,7 +234,7 @@ "element_id": "582ef9f4f3483f1f73bc5ec175bc8892", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -255,7 +255,7 @@ "element_id": "e5c3cb7f77f5a0ce57a7bc1ee967ebb9", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -276,7 +276,7 @@ "element_id": "bf361fe9e95971d11badc9dedd8de25d", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -297,7 +297,7 @@ "element_id": "bb81a72db74d77059c459701be1193a4", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -318,7 +318,7 @@ "element_id": "e43d418c6a8817cfb09fcfd081bfd256", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" @@ -339,7 +339,7 @@ "element_id": "aec400e3e65dc09b31513694bc9893b9", "metadata": { "data_source": { - "date_modified": "2023-10-17T23:20:41", + "date_modified": "2023-10-17T23:20:41+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/page-with-formula.pdf" diff --git a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json index 0fc20e972a..28412e0b82 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json @@ -3,7 +3,7 @@ "element_id": "3288e0ea130894600aa48a45aaf12121", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -24,7 +24,7 @@ "element_id": "61a1e4ec9d7a8140a78bbb7450ec65ca", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -45,7 +45,7 @@ "element_id": "ad3db440731a2892ac234c2ee7bb5b04", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -66,7 +66,7 @@ "element_id": "f36c8656d6b853a5728f6f17c29706f3", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -87,7 +87,7 @@ "element_id": "a23e0b3be89e2c3a3088fdbddfa3bcb1", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -108,7 +108,7 @@ "element_id": "d4d5f1410356bb0053eeff6a9d2e84ae", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -129,7 +129,7 @@ "element_id": "19d767c50c1203bd1d3882353b464e73", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -150,7 +150,7 @@ "element_id": "ffcf7b0ae159cc2a1bc0a07e3618454a", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -171,7 +171,7 @@ "element_id": "c25840980b1b27ba0706d4319d6e3aa8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -192,7 +192,7 @@ "element_id": "0048cc9470dc1736e2fc654d7236a3ee", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -213,7 +213,7 @@ "element_id": "676bbd12ba352a9d057d15cb8c925e36", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -234,7 +234,7 @@ "element_id": "0fc5165686190ca845407c03ad4572e8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -255,7 +255,7 @@ "element_id": "5749fdd6b67e4204b3047ba33540bc87", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -276,7 +276,7 @@ "element_id": "a6c3efab675abc38935ff01d2d8b8386", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -297,7 +297,7 @@ "element_id": "dbe6820b5750298e87712c37dfe97b7d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -318,7 +318,7 @@ "element_id": "45e9c81bf6ccdc498a6ac5640d786736", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -339,7 +339,7 @@ "element_id": "92a15f52537ead259f4d9c2da1b22454", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -360,7 +360,7 @@ "element_id": "8c4b7bab8fb561388b19a37576e4665f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -381,7 +381,7 @@ "element_id": "cd51de1780ee75925a66b066fb7e4e01", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -402,7 +402,7 @@ "element_id": "80b622f9209254c3963bf7431da6a13a", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -423,7 +423,7 @@ "element_id": "ae4cbd98081526c1b5f7fbd30f47c869", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -444,7 +444,7 @@ "element_id": "d6c864820d8af0aed00d45c41ca0691e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -465,7 +465,7 @@ "element_id": "829f6e4c5ed9b4e900ab4a696e46687b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -486,7 +486,7 @@ "element_id": "062875b861d4d40027ab674686333587", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -507,7 +507,7 @@ "element_id": "e11247712b3df61756970b45f019ad68", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -528,7 +528,7 @@ "element_id": "3f79bb7b435b05321651daefd374cdc6", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -549,7 +549,7 @@ "element_id": "f83714d89302473e0e4f5399bd50e7a9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -570,7 +570,7 @@ "element_id": "f9bb49945b60897227abdd75b5f8d39b", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -591,7 +591,7 @@ "element_id": "1fb2ec4fc8fc547c0de86ba79ba651e5", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -612,7 +612,7 @@ "element_id": "6a3adc54db5128f797d4a12855193373", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -633,7 +633,7 @@ "element_id": "44f0d817d4311d9da996b2cb20dc80c8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -654,7 +654,7 @@ "element_id": "2c07d964a50db8baf3ed9db257827518", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -675,7 +675,7 @@ "element_id": "47ad83d77e60857060e4435724be6db6", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -696,7 +696,7 @@ "element_id": "e450813fe6430d87c4caa64e4792bc74", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -717,7 +717,7 @@ "element_id": "30844d5faa0b85b758a56d22dd4c5048", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -738,7 +738,7 @@ "element_id": "70d25f2c1428def16804c3b346ee8d13", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -759,7 +759,7 @@ "element_id": "0b0bd7ca2acebad288fe09c9d9595f1f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -780,7 +780,7 @@ "element_id": "b3dc46f381163a6ba4304765c36bc32e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -801,7 +801,7 @@ "element_id": "8678fa69494c8706052cd795f4f104b2", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -822,7 +822,7 @@ "element_id": "69172a66b601bc530a3d701869fe70ff", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -843,7 +843,7 @@ "element_id": "38bd099c0174246d8ac9b5316877b4cb", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -864,7 +864,7 @@ "element_id": "006f701bd8073d16266a7877cc66ca8e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -885,7 +885,7 @@ "element_id": "14652e5114533ec4e41bae34a9ae8508", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -906,7 +906,7 @@ "element_id": "de64c96ca0e7c0888fe7ca31fae679bb", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -927,7 +927,7 @@ "element_id": "f20b489f0bdd9d6e52a9260366e15809", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -948,7 +948,7 @@ "element_id": "cac91db6239b98e81af3b7d4657aba8e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -969,7 +969,7 @@ "element_id": "9480b681b55abea19f25da2d11c5a05f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -990,7 +990,7 @@ "element_id": "79de44b69099529ba9f79b31427cad59", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1011,7 +1011,7 @@ "element_id": "e36be3332e741c097797f63a8fd3707d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1032,7 +1032,7 @@ "element_id": "af07e68b26fd9db43d499e55ca9e018f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1053,7 +1053,7 @@ "element_id": "256d31a451bb2c5a0d86a9e80a3ecacd", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1074,7 +1074,7 @@ "element_id": "aec400e3e65dc09b31513694bc9893b9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1095,7 +1095,7 @@ "element_id": "29399af043bbf069ecfd1abdcaee4b15", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1116,7 +1116,7 @@ "element_id": "091660349a1a512762bb9380e9d14cf5", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1137,7 +1137,7 @@ "element_id": "b864ba4245ef997f44c68d9d57c1fe29", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1158,7 +1158,7 @@ "element_id": "f18dedc70c4506433490779a01898ae0", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1179,7 +1179,7 @@ "element_id": "49cf9714f5fae5188caca69778912b35", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1200,7 +1200,7 @@ "element_id": "6e611a47ebc4cf4fafb25fa9fd407396", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1221,7 +1221,7 @@ "element_id": "5b6b71051007872635d4f529e167f04e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1242,7 +1242,7 @@ "element_id": "676bbd12ba352a9d057d15cb8c925e36", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1263,7 +1263,7 @@ "element_id": "c69b33366ea0bcfb6c30799a4100c6a0", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1284,7 +1284,7 @@ "element_id": "671a02117af87c2371462f800d856f15", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1305,7 +1305,7 @@ "element_id": "a0d7deccf89e42d02a9d66b0c1889689", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1326,7 +1326,7 @@ "element_id": "de7d1b721a1e0632b7cf04edf5032c8e", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1347,7 +1347,7 @@ "element_id": "5389164ad375b2831f97d751f5bdb4e6", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1368,7 +1368,7 @@ "element_id": "5d7f49449ab22deac22d767b89549c55", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1389,7 +1389,7 @@ "element_id": "f60b349ce3d55412b0f4e4a6b658c5ca", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1410,7 +1410,7 @@ "element_id": "c5abfd8856e08e4a6d884b3e2bf860f7", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1431,7 +1431,7 @@ "element_id": "72e939a5bd164f925c3d0aeb3d9a6af8", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1452,7 +1452,7 @@ "element_id": "4c94485e0c21ae6c41ce1dfe7b6bface", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1473,7 +1473,7 @@ "element_id": "add0a2858326fca8ba9f6f4e6bbbdb28", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1494,7 +1494,7 @@ "element_id": "c0ff93ea8927a7366db0331e5fd9d19f", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1515,7 +1515,7 @@ "element_id": "3750db60e990408fd944ea48886461b2", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1536,7 +1536,7 @@ "element_id": "933612b1c8ac73db347d57c4c1006b30", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1557,7 +1557,7 @@ "element_id": "a0acb863356551c514d2a230c16499bc", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1578,7 +1578,7 @@ "element_id": "bbea23b1fb74c1188b13acbf2b55c077", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1599,7 +1599,7 @@ "element_id": "c0ff9c6926628d5d8e0318ebdf439444", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1620,7 +1620,7 @@ "element_id": "4fd3863356260022a18a3c6932cbf1bf", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1641,7 +1641,7 @@ "element_id": "1646468e15668c053f076342c361e1a9", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1662,7 +1662,7 @@ "element_id": "d36f85976820f1619b8be0e2e65d011d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1683,7 +1683,7 @@ "element_id": "06d4880e4a23a9520618a50bfbbdf940", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1704,7 +1704,7 @@ "element_id": "8ff63a0f4af4de37eff90952d575f76d", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1725,7 +1725,7 @@ "element_id": "4ac69378210b56e0c98be2d41e374769", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" @@ -1746,7 +1746,7 @@ "element_id": "e0366cbb8cc558e39e0ef80e08f603c1", "metadata": { "data_source": { - "date_modified": "2023-02-12T10:09:32", + "date_modified": "2023-02-12T10:09:32+00:00", "record_locator": { "protocol": "s3", "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" diff --git a/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv b/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv index 6d98818c49..e81eefc503 100644 --- a/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv +++ b/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv @@ -1,3 +1,3 @@ metric average sample_sd population_sd count -cct-accuracy 0.803 0.248 0.241 16 +cct-accuracy 0.806 0.247 0.24 16 cct-%missing 0.025 0.033 0.032 16 diff --git a/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv b/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv index 053253fb8d..53059f3062 100644 --- a/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv +++ b/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv @@ -1,13 +1,13 @@ filename doctype connector cct-accuracy cct-%missing fake-text.txt txt Sharepoint 1.0 0.0 -ideas-page.html html Sharepoint 0.929 0.033 +ideas-page.html html Sharepoint 0.93 0.033 stanley-cups.xlsx xlsx Sharepoint 0.778 0.0 Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf pdf azure 0.981 0.007 IRS-form-1987.pdf pdf azure 0.783 0.135 spring-weather.html html azure 0.0 0.018 -example-10k.html html local 0.686 0.037 +example-10k.html html local 0.727 0.037 fake-html-cp1252.html html local 0.659 0.0 -ideas-page.html html local 0.929 0.033 +ideas-page.html html local 0.93 0.033 UDHR_first_article_all.txt txt local-single-file 0.995 0.0 fake-html-cp1252.html html local-single-file-with-encoding 0.659 0.0 layout-parser-paper-with-table.jpg jpg local-single-file-with-pdf-infer-table-structure 0.716 0.032 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 13856f5f51..452ab079f9 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.11.9-dev4" # pragma: no cover +__version__ = "0.12.0" # pragma: no cover