-
Notifications
You must be signed in to change notification settings - Fork 1.1k
50 lines (48 loc) · 1.28 KB
/
1-test-trtllm2.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
name: "TensorRT-LLM C/C++/CUDA Test Suite POPO"
on:
push:
branches:
- 'main'
- 'trtllm/ci'
tags:
- 'v*'
pull_request:
paths:
- "backends/trtllm"
- "server/**"
- "proto/**"
- "router/**"
- "launcher/**"
- "Cargo.lock"
- "rust-toolchain.toml"
- ".github/workflows/build_trtllm.yaml"
- ".github/workflows/trtllm_tests.yaml"
branches:
- "main"
jobs:
build-and-push:
permissions:
contents: write
packages: write
id-token: write
uses: ./.github/workflows/build_trtllm.yaml
with:
runs-on: aws-highmemory-64-plus-priv
secrets:
AWS_ROLE_GITHUB_TGI_TEST: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
run-tests:
needs: build-and-push
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
runs-on:
group: aws-highmemory-64-plus-priv
container:
image: ${{ needs.build-and-push.outputs.docker_image }}
credentials:
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
options: --gpus all --shm-size=8g
steps:
- name: Run C++/CUDA tests
run: /usr/local/tgi/bin/tgi_trtllm_backend_tests