Skip to content

fix: SQL stager and Snowflake uploader #1920

fix: SQL stager and Snowflake uploader

fix: SQL stager and Snowflake uploader #1920

Workflow file for this run

name: End-to-End Tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main, release/* ]
merge_group:
branches: [ main ]
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
jobs:
setup:
strategy:
matrix:
python-version: [ "3.9","3.10" ]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
check-only: 'true'
check_untagged_tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: run check
run: |
make check-untagged-tests
api_based_int_test:
strategy:
matrix:
test: ["partitioners", "chunkers"]
runs-on: ubuntu-latest
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run ${{ matrix.test }} integration test
env:
UNSTRUCTURED_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
UNSTRUCTURED_API_URL: "https://api.unstructuredapp.io"
run: |
make install-base
make install-client
make install-test
make integration-test-${{ matrix.test }}
make parse-skipped-tests
embedders_int_test:
runs-on: ubuntu-latest
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run ${{ matrix.test }} integration test
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
MXBAI_API_KEY: ${{ secrets.MXBAI_API_KEY }}
TOGETHERAI_API_KEY: ${{ secrets.TOGETHERAI_API_KEY }}
VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }}
VERTEXAI_API_KEY: ${{ secrets.VERTEXAI_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
run: |
make install-base
make install-all-embedders
make install-test
make integration-test-embedders
make parse-skipped-tests
blob_storage_connectors_int_test:
runs-on: ubuntu-latest-m
needs: [ setup, check_untagged_tests ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
# Databricks
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
DATABRICKS_PAT: ${{ secrets.DATABRICKS_PAT }}
# Onedrive
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
# S3
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-blob-storage
make parse-skipped-tests
sql_connectors_int_test:
runs-on: ubuntu-latest-m
needs: [ setup, check_untagged_tests ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
# Motherduck
# MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }}
# Snowflake
# LOCALSTACK_AUTH_TOKEN: ${{ secrets.LOCALSTACK_AUTH_TOKEN }}
# Delta Tables
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
# Databricks Delta Tables
DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_SERVER_HOSTNAME }}
DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
DATABRICKS_ACCESS_TOKEN: ${{ secrets.DATABRICKS_ACCESS_TOKEN }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-sql
make parse-skipped-tests
nosql_connectors_int_test:
runs-on: ubuntu-latest-m
needs: [ setup, check_untagged_tests ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
# MongoDB
MONGODB_URI: ${{ secrets.MONGODB_URI }}
MONGODB_DATABASE: ${{ secrets.MONGODB_DATABASE_NAME }}
# Redis
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
# Vectara
VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}}
VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-nosql
make parse-skipped-tests
vector_db_connectors_int_test:
runs-on: ubuntu-latest-m
needs: [ setup, check_untagged_tests ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
# AstraDB
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
# Azure AI Search
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
# LanceDB
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
# Pinecone
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-vector-db
make parse-skipped-tests
uncategorized_connectors_int_test:
runs-on: ubuntu-latest-m
needs: [ setup, check_untagged_tests ]
steps:
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: "3.10"
- name: Setup up docker
run: |
sudo make install-docker-compose
docker compose version
- name: Run Integration Tests
env:
# Discord
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
DISCORD_CHANNELS: ${{ secrets.DISCORD_CHANNELS }}
# Kafka
KAFKA_API_KEY: ${{ secrets.KAFKA_API_KEY }}
KAFKA_SECRET: ${{ secrets.KAFKA_SECRET }}
KAFKA_BOOTSTRAP_SERVER: ${{ secrets.KAFKA_BOOTSTRAP_SERVER }}
# Confluence
CONFLUENCE_USER_EMAIL: ${{secrets.CONFLUENCE_USER_EMAIL}}
CONFLUENCE_API_TOKEN: ${{secrets.CONFLUENCE_API_TOKEN}}
# Notion
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
run : |
source .venv/bin/activate
make install-test
make integration-test-connectors-uncategorized
make parse-skipped-tests
src_e2e_test:
strategy:
matrix:
python-version: ["3.9","3.10"]
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
- name: Test (end-to-end)
env:
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}
BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }}
HUBSPOT_API_TOKEN: ${{ secrets.HUBSPOT_API_TOKEN }}
JIRA_INGEST_API_TOKEN: ${{ secrets.JIRA_INGEST_API_TOKEN }}
JIRA_INGEST_USER_EMAIL: ${{ secrets.JIRA_INGEST_USER_EMAIL }}
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
SALESFORCE_USERNAME: ${{secrets.SALESFORCE_USERNAME}}
SALESFORCE_CONSUMER_KEY: ${{secrets.SALESFORCE_CONSUMER_KEY}}
SALESFORCE_PRIVATE_KEY: ${{secrets.SALESFORCE_PRIVATE_KEY}}
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
SHAREPOINT_SITE: ${{secrets.SHAREPOINT_SITE}}
SHAREPOINT_PERMISSIONS_APP_ID: ${{secrets.SHAREPOINT_PERMISSIONS_APP_ID}}
SHAREPOINT_PERMISSIONS_APP_CRED: ${{secrets.SHAREPOINT_PERMISSIONS_APP_CRED}}
SHAREPOINT_PERMISSIONS_TENANT: ${{secrets.SHAREPOINT_PERMISSIONS_TENANT}}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_ENDPOINT }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
CI: "true"
run: |
source .venv/bin/activate
sudo make install-docker-compose
docker compose version
pip freeze
./test_e2e/test-src.sh
src_api_test:
runs-on: ubuntu-latest
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Install limited dependencies
run: |
make install-client
make install-base
- name: Run test against remote API
env:
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
run: |
./test_e2e/src/against-api.sh
dest_e2e_test:
environment: ci
strategy:
matrix:
python-version: [ "3.9","3.10" ]
runs-on: ubuntu-latest-m
needs: [ setup ]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: ${{ matrix.python-version }}
- name: Test (end-to-end)
env:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }}
S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
AZURE_REDIS_INGEST_TEST_PASSWORD: ${{ secrets.AZURE_REDIS_INGEST_TEST_PASSWORD }}
BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }}
PINECONE_API_KEY: ${{secrets.PINECONE_API_KEY}}
VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}}
VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}}
VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}}
ASTRA_DB_APPLICATION_TOKEN: ${{secrets.ASTRA_DB_APPLICATION_TOKEN}}
ASTRA_DB_API_ENDPOINT: ${{secrets.ASTRA_DB_ENDPOINT}}
CLARIFAI_API_KEY: ${{secrets.CLARIFAI_API_KEY}}
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
KDBAI_BEARER_TOKEN: ${{ secrets.KDBAI_BEARER_TOKEN }}
KDBAI_USERNAME: ${{ secrets.KDBAI_USERNAME }}
KDBAI_CIPHER_KEY: ${{ secrets.KDBAI_CIPHER_KEY }}
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
CI: "true"
run: |
source .venv/bin/activate
sudo make install-docker-compose
docker compose version
pip freeze
./test_e2e/test-dest.sh