Skip to content

Commit

Permalink
Merge pull request #119 from GATEOverflow/mlperf-inference
Browse files Browse the repository at this point in the history
Merge from go
  • Loading branch information
arjunsuresh authored Jul 17, 2024
2 parents 8a8f6bb + ee81196 commit 92abd99
Show file tree
Hide file tree
Showing 4 changed files with 603 additions and 18 deletions.
146 changes: 129 additions & 17 deletions script/get-preprocessed-dataset-criteo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,87 @@
"names": [
"dlrm-src"
],
"tags": "get,dlrm,src"
"tags": "get,dlrm,src",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"names": [
"inference-src"
],
"tags": "mlperf,mlcommons,inference,source,src"
"tags": "mlperf,mlcommons,inference,source,src",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_scikit-learn"
"tags": "get,generic-python-lib,_scikit-learn",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_torch"
"tags": "get,generic-python-lib,_torch",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_opencv-python"
"tags": "get,generic-python-lib,_opencv-python",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_decorator"
"tags": "get,generic-python-lib,_decorator",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_psutil"
"tags": "get,generic-python-lib,_psutil",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_onnx"
"tags": "get,generic-python-lib,_onnx",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_tqdm"
"tags": "get,generic-python-lib,_tqdm",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
},
{
"tags": "get,generic-python-lib,_mlperf_logging"
"tags": "get,generic-python-lib,_mlperf_logging",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_PATH": [
"on"
]
}
}
],
"input_mapping": {
Expand All @@ -70,7 +120,8 @@
"threads": "CM_NUM_PREPROCESS_THREADS"
},
"new_env_keys": [
"CM_DATASET_*"
"CM_DATASET_*",
"CM_CRITEO_PREPROCESSED_PATH"
],
"tags": [
"get",
Expand Down Expand Up @@ -118,6 +169,37 @@
"CM_CRITEO_FAKE": "yes"
}
},
"mlc": {
"group": "src",
"env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": "yes",
"CM_DATASET_PREPROCESSED_PATH": "on"
}
},
"multihot,mlc": {
"env": {
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",
"CM_EXTRACT_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",
"CM_EXTRACT_TO_FOLDER": "criteo-preprocessed"
},
"deps": [
{
"tags": "download-and-extract,_rclone",
"update_tags_from_env_with_prefix": {
"_url.": [
"CM_RCLONE_URL"
]
},
"force_cache": true,
"names": [
"dae"
],
"extra_cache_tags": "criteo,preprocessed,dataset"
}
]
},
"multihot": {
"group": "type",
"default": true,
Expand All @@ -130,22 +212,52 @@
"names": [
"mlperf-training",
"training-src"
]
],
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.typing_inspect"
"tags": "get,generic-python-lib,_package.typing_inspect",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.iopath"
"tags": "get,generic-python-lib,_package.iopath",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.fbgemm_gpu"
"tags": "get,generic-python-lib,_package.fbgemm_gpu",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.torchrec"
"tags": "get,generic-python-lib,_package.torchrec",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.pyre_extensions"
"tags": "get,generic-python-lib,_package.pyre_extensions",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [
"yes"
]
}
}
]
}
Expand Down
9 changes: 9 additions & 0 deletions script/get-preprocessed-dataset-criteo/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,12 @@ def preprocess(i):
print("Using MLCommons Training source from '" + env['CM_MLPERF_TRAINING_SOURCE'] +"'")

return {'return': 0}

def postprocess(i):

env = i['env']
env['CM_CRITEO_PREPROCESSED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH']

env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_CRITEO_PREPROCESSED_PATH']

return {'return': 0}
110 changes: 109 additions & 1 deletion script/run-vllm-server/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,124 @@ input_mapping:
pp_size: CM_VLLM_SERVER_PP_SIZE
distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND
api_key: CM_VLLM_SERVER_API_KEY
skip_docker_model_download: CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD
host: CM_VLLM_SERVER_HOST
port: CM_VLLM_SERVER_PORT
uvicorn_log_level: CM_VLLM_SERVER_UVICORN_LOG_LEVEL
allow_credentials: CM_VLLM_SERVER_ALLOW_CREDENTIALS
allowed_origins: CM_VLLM_SERVER_ALLOWED_ORIGINS
allowed_methods: CM_VLLM_SERVER_ALLOWED_METHODS
allowed_headers: CM_VLLM_SERVER_ALLOWED_HEADERS
lora_modules: CM_VLLM_SERVER_LORA_MODULES
prompt_adapters: CM_VLLM_SERVER_PROMPT_ADAPTERS
chat_template: CM_VLLM_SERVER_CHAT_TEMPLATE
response_role: CM_VLLM_SERVER_RESPONSE_ROLE
ssl_keyfile: CM_VLLM_SERVER_SSL_KEYFILE
ssl_certfile: CM_VLLM_SERVER_SSL_CERTFILE
ssl_ca_certs: CM_VLLM_SERVER_SSL_CA_CERTS
ssl_cert_reqs: CM_VLLM_SERVER_SSL_CERT_REQS
root_path: CM_VLLM_SERVER_ROOT_PATH
middleware: CM_VLLM_SERVER_MIDDLEWARE
tokenizer: CM_VLLM_SERVER_TOKENIZER
skip_tokenizer_init: CM_VLLM_SERVER_SKIP_TOKENIZER_INIT
revision: CM_VLLM_SERVER_REVISION
code_revision: CM_VLLM_SERVER_CODE_REVISION
tokenizer_revision: CM_VLLM_SERVER_TOKENIZER_REVISION
tokenizer_mode: CM_VLLM_SERVER_TOKENIZER_MODE
trust_remote_code: CM_VLLM_SERVER_TRUST_REMOTE_CODE
download_dir: CM_VLLM_SERVER_DOWNLOAD_DIR
load_format: CM_VLLM_SERVER_LOAD_FORMAT
dtype: CM_VLLM_SERVER_DTYPE
kv_cache_dtype: CM_VLLM_SERVER_KV_CACHE_DTYPE
quantization_param_path: CM_VLLM_SERVER_QUANTIZATION_PARAM_PATH
max_model_len: CM_VLLM_SERVER_MAX_MODEL_LEN
guided_decoding_backend: CM_VLLM_SERVER_GUIDED_DECODING_BACKEND
worker_use_ray: CM_VLLM_SERVER_WORKER_USE_RAY
pipeline_parallel_size: CM_VLLM_SERVER_PIPELINE_PARALLEL_SIZE
max_parallel_loading_workers: CM_VLLM_SERVER_MAX_PARALLEL_LOADING_WORKERS
ray_workers_use_nsight: CM_VLLM_SERVER_RAY_WORKERS_USE_NSIGHT
block_size: CM_VLLM_SERVER_BLOCK_SIZE
enable_prefix_caching: CM_VLLM_SERVER_ENABLE_PREFIX_CACHING
disable_sliding_window: CM_VLLM_SERVER_DISABLE_SLIDING_WINDOW
use_v2_block_manager: CM_VLLM_SERVER_USE_V2_BLOCK_MANAGER
num_lookahead_slots: CM_VLLM_SERVER_NUM_LOOKAHEAD_SLOTS
seed: CM_VLLM_SERVER_SEED
swap_space: CM_VLLM_SERVER_SWAP_SPACE
gpu_memory_utilization: CM_VLLM_SERVER_GPU_MEMORY_UTILIZATION
num_gpu_blocks_override: CM_VLLM_SERVER_NUM_GPU_BLOCKS_OVERRIDE
max_num_batched_tokens: CM_VLLM_SERVER_MAX_NUM_BATCHED_TOKENS
max_num_seqs: CM_VLLM_SERVER_MAX_NUM_SEQS
max_logprobs: CM_VLLM_SERVER_MAX_LOGPROBS
disable_log_stats: CM_VLLM_SERVER_DISABLE_LOG_STATS
quantization: CM_VLLM_SERVER_QUANTIZATION
rope_scaling: CM_VLLM_SERVER_ROPE_SCALING
rope_theta: CM_VLLM_SERVER_ROPE_THETA
enforce_eager: CM_VLLM_SERVER_ENFORCE_EAGER
max_context_len_to_capture: CM_VLLM_SERVER_MAX_CONTEXT_LEN_TO_CAPTURE
max_seq_len_to_capture: CM_VLLM_SERVER_MAX_SEQ_LEN_TO_CAPTURE
disable_custom_all_reduce: CM_VLLM_SERVER_DISABLE_CUSTOM_ALL_REDUCE
tokenizer_pool_size: CM_VLLM_SERVER_TOKENIZER_POOL_SIZE
tokenizer_pool_type: CM_VLLM_SERVER_TOKENIZER_POOL_TYPE
tokenizer_pool_extra_config: CM_VLLM_SERVER_TOKENIZER_POOL_EXTRA_CONFIG
enable_lora: CM_VLLM_SERVER_ENABLE_LORA
max_loras: CM_VLLM_SERVER_MAX_LORAS
max_lora_rank: CM_VLLM_SERVER_MAX_LORA_RANK
lora_extra_vocab_size: CM_VLLM_SERVER_LORA_EXTRA_VOCAB_SIZE
lora_dtype: CM_VLLM_SERVER_LORA_DTYPE
long_lora_scaling_factors: CM_VLLM_SERVER_LONG_LORA_SCALING_FACTORS
max_cpu_loras: CM_VLLM_SERVER_MAX_CPU_LORAS
fully_sharded_loras: CM_VLLM_SERVER_FULLY_SHARDED_LORAS
enable_prompt_adapter: CM_VLLM_SERVER_ENABLE_PROMPT_ADAPTER
max_prompt_adapters: CM_VLLM_SERVER_MAX_PROMPT_ADAPTERS
max_prompt_adapter_token: CM_VLLM_SERVER_MAX_PROMPT_ADAPTER_TOKEN
device: CM_VLLM_SERVER_DEVICE
scheduler_delay_factor: CM_VLLM_SERVER_SCHEDULER_DELAY_FACTOR
enable_chunked_prefill: CM_VLLM_SERVER_ENABLE_CHUNKED_PREFILL
speculative_model: CM_VLLM_SERVER_SPECULATIVE_MODEL
num_speculative_tokens: CM_VLLM_SERVER_NUM_SPECULATIVE_TOKENS
speculative_draft_tensor_parallel_size: CM_VLLM_SERVER_SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE
speculative_max_model_len: CM_VLLM_SERVER_SPECULATIVE_MAX_MODEL_LEN
speculative_disable_by_batch_size: CM_VLLM_SERVER_SPECULATIVE_DISABLE_BY_BATCH_SIZE
ngram_prompt_lookup_max: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MAX
ngram_prompt_lookup_min: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MIN
spec_decoding_acceptance_method: CM_VLLM_SERVER_SPEC_DECODING_ACCEPTANCE_METHOD
typical_acceptance_sampler_posterior_threshold: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD
typical_acceptance_sampler_posterior_alpha: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA
model_loader_extra_config: CM_VLLM_SERVER_MODEL_LOADER_EXTRA_CONFIG
preemption_mode: CM_VLLM_SERVER_PREEMPTION_MODE
served_model_name: CM_VLLM_SERVER_SERVED_MODEL_NAME
qlora_adapter_name_or_path: CM_VLLM_SERVER_QLORA_ADAPTER_NAME_OR_PATH
otlp_traces_endpoint: CM_VLLM_SERVER_OTLP_TRACES_ENDPOINT
engine_use_ray: CM_VLLM_SERVER_ENGINE_USE_RAY
disable_log_requests: CM_VLLM_SERVER_DISABLE_LOG_REQUESTS
max_log_len: CM_VLLM_SERVER_MAX_LOG_LEN

deps:
- tags: get,python3,get-python3
version_max: "3.11.999"
version_max_usable: "3.11.0"


- tags: get,cuda,_cudnn
names:
- cuda

- tags: get,ml-model,huggingface,zoo,_clone-repo
update_tags_from_env_with_prefix:
_model-stub.:
- CM_VLLM_SERVER_MODEL_NAME
enable_if_env:
CM_VLLM_SERVER_MODEL_NAME: [ on ]
skip_if_env:
CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD: [ on ]

- tags: get,generic-python-lib,_package.vllm

docker:
port_maps:
- "8000:8000"
base_image: nvcr.io/nvidia/pytorch:24.06-py3
interactive: True
extra_run_args: ' --ulimit memlock=-1'
all_gpus: 'yes'
os: "ubuntu"
os_version: "22.04"
Loading

0 comments on commit 92abd99

Please sign in to comment.