Skip to content

Commit

Permalink
[batch infer] Update batch inference template to use RayLLMBatch (#346)
Browse files Browse the repository at this point in the history
Update the current batch llm inference template to use RayLLM-Batch

---------

Co-authored-by: rickyx <[email protected]>
Co-authored-by: Huaiwei Sun <[email protected]>
  • Loading branch information
3 people authored Oct 22, 2024
1 parent ade2205 commit 7aec451
Show file tree
Hide file tree
Showing 10 changed files with 314 additions and 959 deletions.
86 changes: 3 additions & 83 deletions configs/batch-llm/aws.yaml
Original file line number Diff line number Diff line change
@@ -1,84 +1,4 @@
head_node_type:
name: head-node
instance_type: m5.2xlarge
resources:
cpu: 0
worker_node_types:
- name: worker-g5-xlarge-nvidia-a10-1
instance_type: g5.xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 4
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-2xlarge-nvidia-a10-1
instance_type: g5.2xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 4
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-4xlarge-nvidia-a10-1
instance_type: g5.4xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 4
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-8xlarge-nvidia-a10-1
instance_type: g5.8xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 4
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-12xlarge-nvidia-a10-4
instance_type: g5.12xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 1
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-16xlarge-nvidia-a10-1
instance_type: g5.16xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 4
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-24xlarge-nvidia-a10-4
instance_type: g5.24xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 1
use_spot: true
fallback_to_ondemand: true
- name: worker-g5-48xlarge-nvidia-a10-8
instance_type: g5.48xlarge
resources:
custom_resources:
"accelerator_type:A10G": 1
min_workers: 0
max_workers: 1
use_spot: true
fallback_to_ondemand: true
aws:
TagSpecifications:
- ResourceType: instance
Tags:
- Key: as-feature-multi-zone
Value: "true"
name: head
# TODO(ricky): We need head node to have CUDA due to eager import from rayllm_batch now.
instance_type: g5.xlarge
4 changes: 4 additions & 0 deletions configs/batch-llm/gce.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
head_node_type:
name: head
# TODO(ricky): We need head node to have CUDA due to eager import from rayllm_batch now.
instance_type: g2-standard-4-nvidia-l4-1
55 changes: 0 additions & 55 deletions configs/batch-llm/gcp.yaml

This file was deleted.

Loading

0 comments on commit 7aec451

Please sign in to comment.