Skip to content

bugfix/add fallback when reading data based on extension (#332) #1868

bugfix/add fallback when reading data based on extension (#332)

bugfix/add fallback when reading data based on extension (#332) #1868

Workflow file for this run

name: End-to-End Tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main, release/* ]
merge_group:
branches: [ main ]
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
jobs:
setup:
strategy:
matrix:
python-version: [ "3.9","3.10" ]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
check-only: 'true'
api_based_integration_test:
strategy:
matrix:
test: ["partitioners", "chunkers"]
runs-on: ubuntu-latest
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run ${{ matrix.test }} integration test
env:
UNSTRUCTURED_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
UNSTRUCTURED_API_URL: "https://api.unstructuredapp.io"
run: |
make install-base
make install-client
make install-test
make integration-test-${{ matrix.test }}
make parse-skipped-tests
embedders_integration_test:
runs-on: ubuntu-latest
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run ${{ matrix.test }} integration test
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
MXBAI_API_KEY: ${{ secrets.MXBAI_API_KEY }}
TOGETHERAI_API_KEY: ${{ secrets.TOGETHERAI_API_KEY }}
VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }}
VERTEXAI_API_KEY: ${{ secrets.VERTEXAI_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
run: |
make install-base
make install-all-embedders
make install-test
make integration-test-embedders
make parse-skipped-tests
source_connectors_integration_test:
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
DISCORD_CHANNELS: ${{ secrets.DISCORD_CHANNELS }}
CONFLUENCE_USER_EMAIL: ${{secrets.CONFLUENCE_USER_EMAIL}}
CONFLUENCE_API_TOKEN: ${{secrets.CONFLUENCE_API_TOKEN}}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
MONGODB_URI: ${{ secrets.MONGODB_URI }}
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
KAFKA_API_KEY: ${{ secrets.KAFKA_API_KEY }}
KAFKA_SECRET: ${{ secrets.KAFKA_SECRET }}
KAFKA_BOOTSTRAP_SERVER: ${{ secrets.KAFKA_BOOTSTRAP_SERVER }}
DATABRICKS_PAT: ${{ secrets.DATABRICKS_PAT }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-src
make parse-skipped-tests
destination_connectors_integration_test:
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
MONGODB_URI: ${{ secrets.MONGODB_URI }}
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }}
QDRANT_SERVER_URL: ${{ secrets.QDRANT_SERVER_URL }}
KAFKA_API_KEY: ${{ secrets.KAFKA_API_KEY }}
KAFKA_SECRET: ${{ secrets.KAFKA_SECRET }}
KAFKA_BOOTSTRAP_SERVER: ${{ secrets.KAFKA_BOOTSTRAP_SERVER }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-dest
make parse-skipped-tests
test_src:
strategy:
matrix:
python-version: ["3.9","3.10"]
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
- name: Test (end-to-end)
env:
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}
BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }}
HUBSPOT_API_TOKEN: ${{ secrets.HUBSPOT_API_TOKEN }}
JIRA_INGEST_API_TOKEN: ${{ secrets.JIRA_INGEST_API_TOKEN }}
JIRA_INGEST_USER_EMAIL: ${{ secrets.JIRA_INGEST_USER_EMAIL }}
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
SALESFORCE_USERNAME: ${{secrets.SALESFORCE_USERNAME}}
SALESFORCE_CONSUMER_KEY: ${{secrets.SALESFORCE_CONSUMER_KEY}}
SALESFORCE_PRIVATE_KEY: ${{secrets.SALESFORCE_PRIVATE_KEY}}
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
SHAREPOINT_SITE: ${{secrets.SHAREPOINT_SITE}}
SHAREPOINT_PERMISSIONS_APP_ID: ${{secrets.SHAREPOINT_PERMISSIONS_APP_ID}}
SHAREPOINT_PERMISSIONS_APP_CRED: ${{secrets.SHAREPOINT_PERMISSIONS_APP_CRED}}
SHAREPOINT_PERMISSIONS_TENANT: ${{secrets.SHAREPOINT_PERMISSIONS_TENANT}}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
CI: "true"
run: |
source .venv/bin/activate
sudo make install-docker-compose
docker compose version
pip freeze
./test_e2e/test-src.sh
test_src_api:
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Install limited dependencies
run: |
make install-client
make install-base
- name: Run test against remote API
env:
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
run: |
./test_e2e/src/against-api.sh
test_dest:
environment: ci
strategy:
matrix:
python-version: [ "3.9","3.10" ]
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
- name: Test (end-to-end)
env:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
PINECONE_API_KEY: ${{secrets.PINECONE_API_KEY}}
VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}}
VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}}
ASTRA_DB_APPLICATION_TOKEN: ${{secrets.ASTRA_DB_APPLICATION_TOKEN}}
ASTRA_DB_API_ENDPOINT: ${{secrets.ASTRA_DB_ENDPOINT}}
CLARIFAI_API_KEY: ${{secrets.CLARIFAI_API_KEY}}
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
KDBAI_BEARER_TOKEN: ${{ secrets.KDBAI_BEARER_TOKEN }}
KDBAI_USERNAME: ${{ secrets.KDBAI_USERNAME }}
KDBAI_CIPHER_KEY: ${{ secrets.KDBAI_CIPHER_KEY }}
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
CI: "true"
run: |
source .venv/bin/activate
sudo make install-docker-compose
docker compose version
pip freeze
./test_e2e/test-dest.sh