feat: Add MarkdownToTextDocument
(v2)
#427
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# If you change this name also do it in tests_preview_skipper.yml | |
name: Tests (Preview) | |
on: | |
workflow_dispatch: # Activate this workflow manually | |
push: | |
branches: | |
- main | |
# release branches have the form v1.9.x | |
- "v[0-9].*[0-9].x" | |
pull_request: | |
types: | |
- opened | |
- reopened | |
- synchronize | |
- ready_for_review | |
paths: | |
- "haystack/preview/**/*.py" | |
- "test/preview/**/*.py" | |
env: | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} | |
CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }} | |
CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }} | |
PYTHON_VERSION: "3.8" | |
jobs: | |
black: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Black | |
run: | | |
pip install --upgrade pip | |
pip install .[formatting] | |
- name: Check status | |
run: | | |
if ! black . --check; then | |
git status | |
echo "###################################################################################################" | |
echo "# " | |
echo "# CHECK FAILED! Black found issues with your code formatting." | |
echo "# " | |
echo "# Either:" | |
echo "# 1. Run Black locally before committing:" | |
echo "# " | |
echo "# pip install .[formatting]" | |
echo "# black ." | |
echo "# " | |
echo "# 2. Install the pre-commit hook:" | |
echo "# " | |
echo "# pre-commit install" | |
echo "# " | |
echo "# 3. See https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help." | |
echo "# " | |
echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack/issues" | |
echo "# " | |
echo "##################################################################################################" | |
exit 1 | |
fi | |
- name: Calculate alert data | |
id: calculator | |
shell: bash | |
if: (success() || failure()) && github.ref_name == 'main' | |
run: | | |
if [ "${{ job.status }}" = "success" ]; then | |
echo "alert_type=success" >> "$GITHUB_OUTPUT"; | |
else | |
echo "alert_type=error" >> "$GITHUB_OUTPUT"; | |
fi | |
- name: Send event to Datadog | |
if: (success() || failure()) && github.ref_name == 'main' | |
uses: masci/datadog@v1 | |
with: | |
api-key: ${{ secrets.CORE_DATADOG_API_KEY }} | |
api-url: https://api.datadoghq.eu | |
events: | | |
- title: "${{ github.workflow }} workflow" | |
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" | |
alert_type: "${{ steps.calculator.outputs.alert_type }}" | |
source_type_name: "Github" | |
host: ${{ github.repository_owner }} | |
tags: | |
- "project:${{ github.repository }}" | |
- "job:${{ github.job }}" | |
- "run_id:${{ github.run_id }}" | |
- "workflow:${{ github.workflow }}" | |
- "branch:${{ github.ref_name }}" | |
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
unit-tests: | |
name: Unit / ${{ matrix.os }} | |
needs: black | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
- macos-latest | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Haystack | |
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' beautifulsoup4 markdown python-frontmatter | |
- name: Run | |
run: pytest -m "unit" test/preview | |
- name: Calculate alert data | |
id: calculator | |
shell: bash | |
if: (success() || failure()) && github.ref_name == 'main' | |
run: | | |
if [ "${{ job.status }}" = "success" ]; then | |
echo "alert_type=success" >> "$GITHUB_OUTPUT"; | |
else | |
echo "alert_type=error" >> "$GITHUB_OUTPUT"; | |
fi | |
- name: Send event to Datadog | |
if: (success() || failure()) && github.ref_name == 'main' | |
uses: masci/datadog@v1 | |
with: | |
api-key: ${{ secrets.CORE_DATADOG_API_KEY }} | |
api-url: https://api.datadoghq.eu | |
events: | | |
- title: "${{ github.workflow }} workflow" | |
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" | |
alert_type: "${{ steps.calculator.outputs.alert_type }}" | |
source_type_name: "Github" | |
host: ${{ github.repository_owner }} | |
tags: | |
- "project:${{ github.repository }}" | |
- "job:${{ github.job }}" | |
- "run_id:${{ github.run_id }}" | |
- "workflow:${{ github.workflow }}" | |
- "branch:${{ github.ref_name }}" | |
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
integration-tests-linux: | |
name: Integration / ubuntu-latest | |
needs: unit-tests | |
runs-on: ubuntu-latest | |
services: | |
tika: | |
image: apache/tika:2.9.0.0 | |
ports: | |
- 9998:9998 | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
sudo apt update | |
sudo apt install ffmpeg # for local Whisper tests | |
- name: Install Haystack | |
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' beautifulsoup4 markdown python-frontmatter | |
- name: Run | |
run: pytest --maxfail=5 -m "integration" test/preview | |
- name: Calculate alert data | |
id: calculator | |
shell: bash | |
if: (success() || failure()) && github.ref_name == 'main' | |
run: | | |
if [ "${{ job.status }}" = "success" ]; then | |
echo "alert_type=success" >> "$GITHUB_OUTPUT"; | |
else | |
echo "alert_type=error" >> "$GITHUB_OUTPUT"; | |
fi | |
- name: Send event to Datadog | |
if: (success() || failure()) && github.ref_name == 'main' | |
uses: masci/datadog@v1 | |
with: | |
api-key: ${{ secrets.CORE_DATADOG_API_KEY }} | |
api-url: https://api.datadoghq.eu | |
events: | | |
- title: "${{ github.workflow }} workflow" | |
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" | |
alert_type: "${{ steps.calculator.outputs.alert_type }}" | |
source_type_name: "Github" | |
host: ${{ github.repository_owner }} | |
tags: | |
- "project:${{ github.repository }}" | |
- "job:${{ github.job }}" | |
- "run_id:${{ github.run_id }}" | |
- "workflow:${{ github.workflow }}" | |
- "branch:${{ github.ref_name }}" | |
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
integration-tests-macos: | |
name: Integration / macos-latest | |
needs: unit-tests | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
brew install ffmpeg # for local Whisper tests | |
brew install docker | |
colima start | |
- name: Install Haystack | |
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' beautifulsoup4 markdown python-frontmatter | |
- name: Run Tika | |
run: docker run -d -p 9998:9998 apache/tika:2.9.0.0 | |
- name: Run | |
run: pytest --maxfail=5 -m "integration" test/preview | |
- name: Calculate alert data | |
id: calculator | |
shell: bash | |
if: (success() || failure()) && github.ref_name == 'main' | |
run: | | |
if [ "${{ job.status }}" = "success" ]; then | |
echo "alert_type=success" >> "$GITHUB_OUTPUT"; | |
else | |
echo "alert_type=error" >> "$GITHUB_OUTPUT"; | |
fi | |
- name: Send event to Datadog | |
if: (success() || failure()) && github.ref_name == 'main' | |
uses: masci/datadog@v1 | |
with: | |
api-key: ${{ secrets.CORE_DATADOG_API_KEY }} | |
api-url: https://api.datadoghq.eu | |
events: | | |
- title: "${{ github.workflow }} workflow" | |
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" | |
alert_type: "${{ steps.calculator.outputs.alert_type }}" | |
source_type_name: "Github" | |
host: ${{ github.repository_owner }} | |
tags: | |
- "project:${{ github.repository }}" | |
- "job:${{ github.job }}" | |
- "run_id:${{ github.run_id }}" | |
- "workflow:${{ github.workflow }}" | |
- "branch:${{ github.ref_name }}" | |
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
integration-tests-windows: | |
name: Integration / windows-latest | |
needs: unit-tests | |
runs-on: windows-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Haystack | |
run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2' beautifulsoup4 markdown python-frontmatter | |
- name: Run | |
run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika' | |
- name: Calculate alert data | |
id: calculator | |
shell: bash | |
if: (success() || failure()) && github.ref_name == 'main' | |
run: | | |
if [ "${{ job.status }}" = "success" ]; then | |
echo "alert_type=success" >> "$GITHUB_OUTPUT"; | |
else | |
echo "alert_type=error" >> "$GITHUB_OUTPUT"; | |
fi | |
- name: Send event to Datadog | |
if: (success() || failure()) && github.ref_name == 'main' | |
uses: masci/datadog@v1 | |
with: | |
api-key: ${{ secrets.CORE_DATADOG_API_KEY }} | |
api-url: https://api.datadoghq.eu | |
events: | | |
- title: "${{ github.workflow }} workflow" | |
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" | |
alert_type: "${{ steps.calculator.outputs.alert_type }}" | |
source_type_name: "Github" | |
host: ${{ github.repository_owner }} | |
tags: | |
- "project:${{ github.repository }}" | |
- "job:${{ github.job }}" | |
- "run_id:${{ github.run_id }}" | |
- "workflow:${{ github.workflow }}" | |
- "branch:${{ github.ref_name }}" | |
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" |