From c271fdebdcb2347e139bf5460e423b5df5bf84ea Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 17:54:23 +0530 Subject: [PATCH 1/7] changes for docker --- script/run-vllm-server/_cm.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index 5a4485e016..d00417d5f8 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -20,16 +20,31 @@ input_mapping: pp_size: CM_VLLM_SERVER_PP_SIZE distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND api_key: CM_VLLM_SERVER_API_KEY + skip_docker_model_download: CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD deps: - tags: get,python3,get-python3 version_max: "3.11.999" version_max_usable: "3.11.0" - + + - tags: get,cuda,_cudnn + names: + - cuda + - tags: get,ml-model,huggingface,zoo,_clone-repo update_tags_from_env_with_prefix: _model-stub.: - CM_VLLM_SERVER_MODEL_NAME enable_if_env: CM_VLLM_SERVER_MODEL_NAME: [ on ] + skip_if_env: + CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD: [ on ] + - tags: get,generic-python-lib,_package.vllm + +docker: + port_maps: + - "8000:8000" + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + interactive: True + extra_run_args: ' --ulimit memlock=-1' From aedd72a502368c25caac2ef024142a66f4cce164 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 18:33:54 +0530 Subject: [PATCH 2/7] mapped host gpus to docker --- script/run-vllm-server/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index d00417d5f8..0a87b31059 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -48,3 +48,4 @@ docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public interactive: True extra_run_args: ' --ulimit memlock=-1' + all_gpus: 'yes' From 1cb8235774dea386bb86f2e9bffeff5e2d5ec7d2 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 20:51:40 +0530 Subject: [PATCH 3/7] ubuntu+pytorch base image upgrade for vllm --- script/run-vllm-server/_cm.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index 0a87b31059..b99b18add5 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -45,7 +45,9 @@ deps: docker: port_maps: - "8000:8000" - base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + base_image: nvcr.io/nvidia/pytorch:24.06-py3 interactive: True extra_run_args: ' --ulimit memlock=-1' all_gpus: 'yes' + os: "ubuntu" + os_version: "22.04" From b5082ff6bd14ffed1055471a7cf8a995381277ab Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:10:34 +0530 Subject: [PATCH 4/7] added all vllm server api arguments --- script/run-vllm-server/_cm.yaml | 90 +++++ script/run-vllm-server/customize.py | 356 ++++++++++++++++++ .../run-vllm-server/dockerfiles/.dockerignore | 1 + ...n8.9-aarch64-ubuntu22.04-public.Dockerfile | 45 +++ ...64-ubuntu22.04-public.Dockerfile.build.bat | 5 + ...h64-ubuntu22.04-public.Dockerfile.build.sh | 5 + ...nn8.9-x86_64-ubuntu20.04-public.Dockerfile | 45 +++ ...64-ubuntu20.04-public.Dockerfile.build.bat | 5 + ..._64-ubuntu20.04-public.Dockerfile.build.sh | 5 + ...6_64-ubuntu20.04-public.Dockerfile.run.bat | 1 + ...86_64-ubuntu20.04-public.Dockerfile.run.sh | 1 + .../dockerfiles/pytorch:24.06-py3.Dockerfile | 45 +++ .../pytorch:24.06-py3.Dockerfile.build.bat | 5 + .../pytorch:24.06-py3.Dockerfile.build.sh | 5 + .../pytorch:24.06-py3.Dockerfile.run.bat | 1 + .../pytorch:24.06-py3.Dockerfile.run.sh | 1 + script/run-vllm-server/dockerfiles/tmp-run.sh | 42 +++ .../dockerfiles/tmp-state.json | 3 + .../dockerfiles/ubuntu_22.04.Dockerfile | 45 +++ .../ubuntu_22.04.Dockerfile.build.bat | 5 + .../ubuntu_22.04.Dockerfile.build.sh | 5 + .../ubuntu_22.04.Dockerfile.run.bat | 1 + .../ubuntu_22.04.Dockerfile.run.sh | 1 + script/run-vllm-server/tmp-state.json | 3 + 24 files changed, 721 insertions(+) create mode 100644 script/run-vllm-server/dockerfiles/.dockerignore create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh create mode 100755 script/run-vllm-server/dockerfiles/tmp-run.sh create mode 100644 script/run-vllm-server/dockerfiles/tmp-state.json create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh create mode 100644 script/run-vllm-server/tmp-state.json diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index b99b18add5..f75a3d9fec 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -21,6 +21,96 @@ input_mapping: distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND api_key: CM_VLLM_SERVER_API_KEY skip_docker_model_download: CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD + host: CM_VLLM_SERVER_HOST + port: CM_VLLM_SERVER_PORT + uvicorn_log_level: CM_VLLM_SERVER_UVICORN_LOG_LEVEL + allow_credentials: CM_VLLM_SERVER_ALLOW_CREDENTIALS + allowed_origins: CM_VLLM_SERVER_ALLOWED_ORIGINS + allowed_methods: CM_VLLM_SERVER_ALLOWED_METHODS + allowed_headers: CM_VLLM_SERVER_ALLOWED_HEADERS + lora_modules: CM_VLLM_SERVER_LORA_MODULES + prompt_adapters: CM_VLLM_SERVER_PROMPT_ADAPTERS + chat_template: CM_VLLM_SERVER_CHAT_TEMPLATE + response_role: CM_VLLM_SERVER_RESPONSE_ROLE + ssl_keyfile: CM_VLLM_SERVER_SSL_KEYFILE + ssl_certfile: CM_VLLM_SERVER_SSL_CERTFILE + ssl_ca_certs: CM_VLLM_SERVER_SSL_CA_CERTS + ssl_cert_reqs: CM_VLLM_SERVER_SSL_CERT_REQS + root_path: CM_VLLM_SERVER_ROOT_PATH + middleware: CM_VLLM_SERVER_MIDDLEWARE + tokenizer: CM_VLLM_SERVER_TOKENIZER + skip_tokenizer_init: CM_VLLM_SERVER_SKIP_TOKENIZER_INIT + revision: CM_VLLM_SERVER_REVISION + code_revision: CM_VLLM_SERVER_CODE_REVISION + tokenizer_revision: CM_VLLM_SERVER_TOKENIZER_REVISION + tokenizer_mode: CM_VLLM_SERVER_TOKENIZER_MODE + trust_remote_code: CM_VLLM_SERVER_TRUST_REMOTE_CODE + download_dir: CM_VLLM_SERVER_DOWNLOAD_DIR + load_format: CM_VLLM_SERVER_LOAD_FORMAT + dtype: CM_VLLM_SERVER_DTYPE + kv_cache_dtype: CM_VLLM_SERVER_KV_CACHE_DTYPE + quantization_param_path: CM_VLLM_SERVER_QUANTIZATION_PARAM_PATH + max_model_len: CM_VLLM_SERVER_MAX_MODEL_LEN + guided_decoding_backend: CM_VLLM_SERVER_GUIDED_DECODING_BACKEND + worker_use_ray: CM_VLLM_SERVER_WORKER_USE_RAY + pipeline_parallel_size: CM_VLLM_SERVER_PIPELINE_PARALLEL_SIZE + max_parallel_loading_workers: CM_VLLM_SERVER_MAX_PARALLEL_LOADING_WORKERS + ray_workers_use_nsight: CM_VLLM_SERVER_RAY_WORKERS_USE_NSIGHT + block_size: CM_VLLM_SERVER_BLOCK_SIZE + enable_prefix_caching: CM_VLLM_SERVER_ENABLE_PREFIX_CACHING + disable_sliding_window: CM_VLLM_SERVER_DISABLE_SLIDING_WINDOW + use_v2_block_manager: CM_VLLM_SERVER_USE_V2_BLOCK_MANAGER + num_lookahead_slots: CM_VLLM_SERVER_NUM_LOOKAHEAD_SLOTS + seed: CM_VLLM_SERVER_SEED + swap_space: CM_VLLM_SERVER_SWAP_SPACE + gpu_memory_utilization: CM_VLLM_SERVER_GPU_MEMORY_UTILIZATION + num_gpu_blocks_override: CM_VLLM_SERVER_NUM_GPU_BLOCKS_OVERRIDE + max_num_batched_tokens: CM_VLLM_SERVER_MAX_NUM_BATCHED_TOKENS + max_num_seqs: CM_VLLM_SERVER_MAX_NUM_SEQS + max_logprobs: CM_VLLM_SERVER_MAX_LOGPROBS + disable_log_stats: CM_VLLM_SERVER_DISABLE_LOG_STATS + quantization: CM_VLLM_SERVER_QUANTIZATION + rope_scaling: CM_VLLM_SERVER_ROPE_SCALING + rope_theta: CM_VLLM_SERVER_ROPE_THETA + enforce_eager: CM_VLLM_SERVER_ENFORCE_EAGER + max_context_len_to_capture: CM_VLLM_SERVER_MAX_CONTEXT_LEN_TO_CAPTURE + max_seq_len_to_capture: CM_VLLM_SERVER_MAX_SEQ_LEN_TO_CAPTURE + disable_custom_all_reduce: CM_VLLM_SERVER_DISABLE_CUSTOM_ALL_REDUCE + tokenizer_pool_size: CM_VLLM_SERVER_TOKENIZER_POOL_SIZE + tokenizer_pool_type: CM_VLLM_SERVER_TOKENIZER_POOL_TYPE + tokenizer_pool_extra_config: CM_VLLM_SERVER_TOKENIZER_POOL_EXTRA_CONFIG + enable_lora: CM_VLLM_SERVER_ENABLE_LORA + max_loras: CM_VLLM_SERVER_MAX_LORAS + max_lora_rank: CM_VLLM_SERVER_MAX_LORA_RANK + lora_extra_vocab_size: CM_VLLM_SERVER_LORA_EXTRA_VOCAB_SIZE + lora_dtype: CM_VLLM_SERVER_LORA_DTYPE + long_lora_scaling_factors: CM_VLLM_SERVER_LONG_LORA_SCALING_FACTORS + max_cpu_loras: CM_VLLM_SERVER_MAX_CPU_LORAS + fully_sharded_loras: CM_VLLM_SERVER_FULLY_SHARDED_LORAS + enable_prompt_adapter: CM_VLLM_SERVER_ENABLE_PROMPT_ADAPTER + max_prompt_adapters: CM_VLLM_SERVER_MAX_PROMPT_ADAPTERS + max_prompt_adapter_token: CM_VLLM_SERVER_MAX_PROMPT_ADAPTER_TOKEN + device: CM_VLLM_SERVER_DEVICE + scheduler_delay_factor: CM_VLLM_SERVER_SCHEDULER_DELAY_FACTOR + enable_chunked_prefill: CM_VLLM_SERVER_ENABLE_CHUNKED_PREFILL + speculative_model: CM_VLLM_SERVER_SPECULATIVE_MODEL + num_speculative_tokens: CM_VLLM_SERVER_NUM_SPECULATIVE_TOKENS + speculative_draft_tensor_parallel_size: CM_VLLM_SERVER_SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE + speculative_max_model_len: CM_VLLM_SERVER_SPECULATIVE_MAX_MODEL_LEN + speculative_disable_by_batch_size: CM_VLLM_SERVER_SPECULATIVE_DISABLE_BY_BATCH_SIZE + ngram_prompt_lookup_max: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MAX + ngram_prompt_lookup_min: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MIN + spec_decoding_acceptance_method: CM_VLLM_SERVER_SPEC_DECODING_ACCEPTANCE_METHOD + typical_acceptance_sampler_posterior_threshold: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD + typical_acceptance_sampler_posterior_alpha: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA + model_loader_extra_config: CM_VLLM_SERVER_MODEL_LOADER_EXTRA_CONFIG + preemption_mode: CM_VLLM_SERVER_PREEMPTION_MODE + served_model_name: CM_VLLM_SERVER_SERVED_MODEL_NAME + qlora_adapter_name_or_path: CM_VLLM_SERVER_QLORA_ADAPTER_NAME_OR_PATH + otlp_traces_endpoint: CM_VLLM_SERVER_OTLP_TRACES_ENDPOINT + engine_use_ray: CM_VLLM_SERVER_ENGINE_USE_RAY + disable_log_requests: CM_VLLM_SERVER_DISABLE_LOG_REQUESTS + max_log_len: CM_VLLM_SERVER_MAX_LOG_LEN deps: - tags: get,python3,get-python3 diff --git a/script/run-vllm-server/customize.py b/script/run-vllm-server/customize.py index 65288f42e5..aeffdc2002 100644 --- a/script/run-vllm-server/customize.py +++ b/script/run-vllm-server/customize.py @@ -35,6 +35,362 @@ def preprocess(i): if distributed_executor_backend: cmd_args += f" --distributed-executor-backend {distributed_executor_backend}" + host = env.get("CM_VLLM_SERVER_HOST", False) + if host: + cmd_args += f" --host {host}" + + port = env.get("CM_VLLM_SERVER_PORT", False) + if port: + cmd_args += f" --port {port}" + + uvicorn_log_level = env.get("CM_VLLM_SERVER_UVICORN_LOG_LEVEL", False) + if uvicorn_log_level: + cmd_args += f" --uvicorn-log-level {uvicorn_log_level}" + + allow_credentials = env.get("CM_VLLM_SERVER_ALLOW_CREDENTIALS", False) + if allow_credentials: + cmd_args += f" --allow-credentials" + + allowed_origins = env.get("CM_VLLM_SERVER_ALLOWED_ORIGINS", False) + if allowed_origins: + cmd_args += f" --allowed-origins {allowed_origins}" + + allowed_methods = env.get("CM_VLLM_SERVER_ALLOWED_METHODS", False) + if allowed_methods: + cmd_args += f" --allowed-methods {allowed_methods}" + + allowed_headers = env.get("CM_VLLM_SERVER_ALLOWED_HEADERS", False) + if allowed_headers: + cmd_args += f" --allowed-headers {allowed_headers}" + + lora_modules = env.get("CM_VLLM_SERVER_LORA_MODULES", False) + if lora_modules: + cmd_args += f" --lora-modules {lora_modules}" + + prompt_adapters = env.get("CM_VLLM_SERVER_PROMPT_ADAPTERS", False) + if prompt_adapters: + cmd_args += f" --prompt-adapters {prompt_adapters}" + + chat_template = env.get("CM_VLLM_SERVER_CHAT_TEMPLATE", False) + if chat_template: + cmd_args += f" --chat-template {chat_template}" + + response_role = env.get("CM_VLLM_SERVER_RESPONSE_ROLE", False) + if response_role: + cmd_args += f" --response-role {response_role}" + + ssl_keyfile = env.get("CM_VLLM_SERVER_SSL_KEYFILE", False) + if ssl_keyfile: + cmd_args += f" --ssl-keyfile {ssl_keyfile}" + + ssl_certfile = env.get("CM_VLLM_SERVER_SSL_CERTFILE", False) + if ssl_certfile: + cmd_args += f" --ssl-certfile {ssl_certfile}" + + ssl_ca_certs = env.get("CM_VLLM_SERVER_SSL_CA_CERTS", False) + if ssl_ca_certs: + cmd_args += f" --ssl-ca-certs {ssl_ca_certs}" + + ssl_cert_reqs = env.get("CM_VLLM_SERVER_SSL_CERT_REQS", False) + if ssl_cert_reqs: + cmd_args += f" --ssl-cert-reqs {ssl_cert_reqs}" + + root_path = env.get("CM_VLLM_SERVER_ROOT_PATH", False) + if root_path: + cmd_args += f" --root-path {root_path}" + + middleware = env.get("CM_VLLM_SERVER_MIDDLEWARE", False) + if middleware: + cmd_args += f" --middleware {middleware}" + + tokenizer = env.get("CM_VLLM_SERVER_TOKENIZER", False) + if tokenizer: + cmd_args += f" --tokenizer {tokenizer}" + + skip_tokenizer_init = env.get("CM_VLLM_SERVER_SKIP_TOKENIZER_INIT", False) + if skip_tokenizer_init: + cmd_args += f" --skip-tokenizer-init" + + revision = env.get("CM_VLLM_SERVER_REVISION", False) + if revision: + cmd_args += f" --revision {revision}" + + code_revision = env.get("CM_VLLM_SERVER_CODE_REVISION", False) + if code_revision: + cmd_args += f" --code-revision {code_revision}" + + tokenizer_revision = env.get("CM_VLLM_SERVER_TOKENIZER_REVISION", False) + if tokenizer_revision: + cmd_args += f" --tokenizer-revision {tokenizer_revision}" + + tokenizer_mode = env.get("CM_VLLM_SERVER_TOKENIZER_MODE", False) + if tokenizer_mode: + cmd_args += f" --tokenizer-mode {tokenizer_mode}" + + trust_remote_code = env.get("CM_VLLM_SERVER_TRUST_REMOTE_CODE", False) + if trust_remote_code: + cmd_args += f" --trust-remote-code" + + download_dir = env.get("CM_VLLM_SERVER_DOWNLOAD_DIR", False) + if download_dir: + cmd_args += f" --download-dir {download_dir}" + + load_format = env.get("CM_VLLM_SERVER_LOAD_FORMAT", False) + if load_format: + cmd_args += f" --load-format {load_format}" + + dtype = env.get("CM_VLLM_SERVER_DTYPE", False) + if dtype: + cmd_args += f" --dtype {dtype}" + + kv_cache_dtype = env.get("CM_VLLM_SERVER_KV_CACHE_DTYPE", False) + if kv_cache_dtype: + cmd_args += f" --kv-cache-dtype {kv_cache_dtype}" + + quantization_param_path = env.get("CM_VLLM_SERVER_QUANTIZATION_PARAM_PATH", False) + if quantization_param_path: + cmd_args += f" --quantization-param-path {quantization_param_path}" + + max_model_len = env.get("CM_VLLM_SERVER_MAX_MODEL_LEN", False) + if max_model_len: + cmd_args += f" --max-model-len {max_model_len}" + + guided_decoding_backend = env.get("CM_VLLM_SERVER_GUIDED_DECODING_BACKEND", False) + if guided_decoding_backend: + cmd_args += f" --guided-decoding-backend {guided_decoding_backend}" + + worker_use_ray = env.get("CM_VLLM_SERVER_WORKER_USE_RAY", False) + if worker_use_ray: + cmd_args += f" --worker-use-ray" + + max_parallel_loading_workers = env.get("CM_VLLM_SERVER_MAX_PARALLEL_LOADING_WORKERS", False) + if max_parallel_loading_workers: + cmd_args += f" --max-parallel-loading-workers {max_parallel_loading_workers}" + + ray_workers_use_nsight = env.get("CM_VLLM_SERVER_RAY_WORKERS_USE_NSIGHT", False) + if ray_workers_use_nsight: + cmd_args += f" --ray-workers-use-nsight" + + block_size = env.get("CM_VLLM_SERVER_BLOCK_SIZE", False) + if block_size: + cmd_args += f" --block-size {block_size}" + + enable_prefix_caching = env.get("CM_VLLM_SERVER_ENABLE_PREFIX_CACHING", False) + if enable_prefix_caching: + cmd_args += f" --enable-prefix-caching" + + disable_sliding_window = env.get("CM_VLLM_SERVER_DISABLE_SLIDING_WINDOW", False) + if disable_sliding_window: + cmd_args += f" --disable-sliding-window" + + use_v2_block_manager = env.get("CM_VLLM_SERVER_USE_V2_BLOCK_MANAGER", False) + if use_v2_block_manager: + cmd_args += f" --use-v2-block-manager" + + num_lookahead_slots = env.get("CM_VLLM_SERVER_NUM_LOOKAHEAD_SLOTS", False) + if num_lookahead_slots: + cmd_args += f" --num-lookahead-slots {num_lookahead_slots}" + + seed = env.get("CM_VLLM_SERVER_SEED", False) + if seed: + cmd_args += f" --seed {seed}" + + swap_space = env.get("CM_VLLM_SERVER_SWAP_SPACE", False) + if swap_space: + cmd_args += f" --swap-space {swap_space}" + + gpu_memory_utilization = env.get("CM_VLLM_SERVER_GPU_MEMORY_UTILIZATION", False) + if gpu_memory_utilization: + cmd_args += f" --gpu-memory-utilization {gpu_memory_utilization}" + + num_gpu_blocks_override = env.get("CM_VLLM_SERVER_NUM_GPU_BLOCKS_OVERRIDE", False) + if num_gpu_blocks_override: + cmd_args += f" --num-gpu-blocks-override {num_gpu_blocks_override}" + + max_num_batched_tokens = env.get("CM_VLLM_SERVER_MAX_NUM_BATCHED_TOKENS", False) + if max_num_batched_tokens: + cmd_args += f" --max-num-batched-tokens {max_num_batched_tokens}" + + max_num_seqs = env.get("CM_VLLM_SERVER_MAX_NUM_SEQS", False) + if max_num_seqs: + cmd_args += f" --max-num-seqs {max_num_seqs}" + + max_logprobs = env.get("CM_VLLM_SERVER_MAX_LOGPROBS", False) + if max_logprobs: + cmd_args += f" --max-logprobs {max_logprobs}" + + disable_log_stats = env.get("CM_VLLM_SERVER_DISABLE_LOG_STATS", False) + if disable_log_stats: + cmd_args += f" --disable-log-stats" + + quantization = env.get("CM_VLLM_SERVER_QUANTIZATION", False) + if quantization: + cmd_args += f" --quantization {quantization}" + + rope_scaling = env.get("CM_VLLM_SERVER_ROPE_SCALING", False) + if rope_scaling: + cmd_args += f" --rope-scaling {rope_scaling}" + + rope_theta = env.get("CM_VLLM_SERVER_ROPE_THETA", False) + if rope_theta: + cmd_args += f" --rope-theta {rope_theta}" + + enforce_eager = env.get("CM_VLLM_SERVER_ENFORCE_EAGER", False) + if enforce_eager: + cmd_args += f" --enforce-eager" + + max_context_len_to_capture = env.get("CM_VLLM_SERVER_MAX_CONTEXT_LEN_TO_CAPTURE", False) + if max_context_len_to_capture: + cmd_args += f" --max-context-len-to-capture {max_context_len_to_capture}" + + max_seq_len_to_capture = env.get("CM_VLLM_SERVER_MAX_SEQ_LEN_TO_CAPTURE", False) + if max_seq_len_to_capture: + cmd_args += f" --max-seq-len-to-capture {max_seq_len_to_capture}" + + disable_custom_all_reduce = env.get("CM_VLLM_SERVER_DISABLE_CUSTOM_ALL_REDUCE", False) + if disable_custom_all_reduce: + cmd_args += f" --disable-custom-all-reduce" + + tokenizer_pool_size = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_SIZE", False) + if tokenizer_pool_size: + cmd_args += f" --tokenizer-pool-size {tokenizer_pool_size}" + + tokenizer_pool_type = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_TYPE", False) + if tokenizer_pool_type: + cmd_args += f" --tokenizer-pool-type {tokenizer_pool_type}" + + tokenizer_pool_extra_config = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_EXTRA_CONFIG", False) + if tokenizer_pool_extra_config: + cmd_args += f" --tokenizer-pool-extra-config {tokenizer_pool_extra_config}" + + enable_lora = env.get("CM_VLLM_SERVER_ENABLE_LORA", False) + if enable_lora: + cmd_args += f" --enable-lora" + + max_loras = env.get("CM_VLLM_SERVER_MAX_LORAS", False) + if max_loras: + cmd_args += f" --max-loras {max_loras}" + + max_lora_rank = env.get("CM_VLLM_SERVER_MAX_LORA_RANK", False) + if max_lora_rank: + cmd_args += f" --max-lora-rank {max_lora_rank}" + + lora_extra_vocab_size = env.get("CM_VLLM_SERVER_LORA_EXTRA_VOCAB_SIZE", False) + if lora_extra_vocab_size: + cmd_args += f" --lora-extra-vocab-size {lora_extra_vocab_size}" + + lora_dtype = env.get("CM_VLLM_SERVER_LORA_DTYPE", False) + if lora_dtype: + cmd_args += f" --lora-dtype {lora_dtype}" + + long_lora_scaling_factors = env.get("CM_VLLM_SERVER_LONG_LORA_SCALING_FACTORS", False) + if long_lora_scaling_factors: + cmd_args += f" --long-lora-scaling-factors {long_lora_scaling_factors}" + + max_cpu_loras = env.get("CM_VLLM_SERVER_MAX_CPU_LORAS", False) + if max_cpu_loras: + cmd_args += f" --max-cpu-loras {max_cpu_loras}" + + fully_sharded_loras = env.get("CM_VLLM_SERVER_FULLY_SHARDED_LORAS", False) + if fully_sharded_loras: + cmd_args += f" --fully-sharded-loras" + + enable_prompt_adapter = env.get("CM_VLLM_SERVER_ENABLE_PROMPT_ADAPTER", False) + if enable_prompt_adapter: + cmd_args += f" --enable-prompt-adapter" + + max_prompt_adapters = env.get("CM_VLLM_SERVER_MAX_PROMPT_ADAPTERS", False) + if max_prompt_adapters: + cmd_args += f" --max-prompt-adapters {max_prompt_adapters}" + + max_prompt_adapter_token = env.get("CM_VLLM_SERVER_MAX_PROMPT_ADAPTER_TOKEN", False) + if max_prompt_adapter_token: + cmd_args += f" --max-prompt-adapter-token {max_prompt_adapter_token}" + + device = env.get("CM_VLLM_SERVER_DEVICE", False) + if device: + cmd_args += f" --device {device}" + + scheduler_delay_factor = env.get("CM_VLLM_SERVER_SCHEDULER_DELAY_FACTOR", False) + if scheduler_delay_factor: + cmd_args += f" --scheduler-delay-factor {scheduler_delay_factor}" + + enable_chunked_prefill = env.get("CM_VLLM_SERVER_ENABLE_CHUNKED_PREFILL", False) + if enable_chunked_prefill: + cmd_args += f" --enable-chunked-prefill" + + speculative_model = env.get("CM_VLLM_SERVER_SPECULATIVE_MODEL", False) + if speculative_model: + cmd_args += f" --speculative-model {speculative_model}" + + num_speculative_tokens = env.get("CM_VLLM_SERVER_NUM_SPECULATIVE_TOKENS", False) + if num_speculative_tokens: + cmd_args += f" --num-speculative-tokens {num_speculative_tokens}" + + speculative_draft_tensor_parallel_size = env.get("CM_VLLM_SERVER_SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE", False) + if speculative_draft_tensor_parallel_size: + cmd_args += f" --speculative-draft-tensor-parallel-size {speculative_draft_tensor_parallel_size}" + + speculative_max_model_len = env.get("CM_VLLM_SERVER_SPECULATIVE_MAX_MODEL_LEN", False) + if speculative_max_model_len: + cmd_args += f" --speculative-max-model-len {speculative_max_model_len}" + + speculative_disable_by_batch_size = env.get("CM_VLLM_SERVER_SPECULATIVE_DISABLE_BY_BATCH_SIZE", False) + if speculative_disable_by_batch_size: + cmd_args += f" --speculative-disable-by-batch-size {speculative_disable_by_batch_size}" + + ngram_prompt_lookup_max = env.get("CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MAX", False) + if ngram_prompt_lookup_max: + cmd_args += f" --ngram-prompt-lookup-max {ngram_prompt_lookup_max}" + + ngram_prompt_lookup_min = env.get("CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MIN", False) + if ngram_prompt_lookup_min: + cmd_args += f" --ngram-prompt-lookup-min {ngram_prompt_lookup_min}" + + spec_decoding_acceptance_method = env.get("CM_VLLM_SERVER_SPEC_DECODING_ACCEPTANCE_METHOD", False) + if spec_decoding_acceptance_method: + cmd_args += f" --spec-decoding-acceptance-method {spec_decoding_acceptance_method}" + + typical_acceptance_sampler_posterior_threshold = env.get("CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD", False) + if typical_acceptance_sampler_posterior_threshold: + cmd_args += f" --typical-acceptance-sampler-posterior-threshold {typical_acceptance_sampler_posterior_threshold}" + + typical_acceptance_sampler_posterior_alpha = env.get("CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA", False) + if typical_acceptance_sampler_posterior_alpha: + cmd_args += f" --typical-acceptance-sampler-posterior-alpha {typical_acceptance_sampler_posterior_alpha}" + + model_loader_extra_config = env.get("CM_VLLM_SERVER_MODEL_LOADER_EXTRA_CONFIG", False) + if model_loader_extra_config: + cmd_args += f" --model-loader-extra-config {model_loader_extra_config}" + + preemption_mode = env.get("CM_VLLM_SERVER_PREEMPTION_MODE", False) + if preemption_mode: + cmd_args += f" --preemption-mode {preemption_mode}" + + served_model_name = env.get("CM_VLLM_SERVER_SERVED_MODEL_NAME", False) + if served_model_name: + cmd_args += f" --served-model-name {served_model_name}" + + qlora_adapter_name_or_path = env.get("CM_VLLM_SERVER_QLORA_ADAPTER_NAME_OR_PATH", False) + if qlora_adapter_name_or_path: + cmd_args += f" --qlora-adapter-name-or-path {qlora_adapter_name_or_path}" + + otlp_traces_endpoint = env.get("CM_VLLM_SERVER_OTLP_TRACES_ENDPOINT", False) + if otlp_traces_endpoint: + cmd_args += f" --otlp-traces-endpoint {otlp_traces_endpoint}" + + engine_use_ray = env.get("CM_VLLM_SERVER_ENGINE_USE_RAY", False) + if engine_use_ray: + cmd_args += f" --engine-use-ray" + + disable_log_requests = env.get("CM_VLLM_SERVER_DISABLE_LOG_REQUESTS", False) + if disable_log_requests: + cmd_args += f" --disable-log-requests" + + max_log_len = env.get("CM_VLLM_SERVER_MAX_LOG_LEN", False) + if max_log_len: + cmd_args += f" --max-log-len {max_log_len}" + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} -m vllm.entrypoints.openai.api_server {cmd_args}" print(cmd) diff --git a/script/run-vllm-server/dockerfiles/.dockerignore b/script/run-vllm-server/dockerfiles/.dockerignore new file mode 100644 index 0000000000..6b8710a711 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/.dockerignore @@ -0,0 +1 @@ +.git diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile new file mode 100644 index 0000000000..61cb8bb4c9 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat new file mode 100644 index 0000000000..998a0d9230 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh new file mode 100644 index 0000000000..4420c7341b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile new file mode 100644 index 0000000000..cbf91bdaea --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat new file mode 100644 index 0000000000..4b28e3c6c7 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh new file mode 100644 index 0000000000..17521deb81 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat new file mode 100644 index 0000000000..e09e482494 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh new file mode 100644 index 0000000000..e09e482494 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile new file mode 100644 index 0000000000..2db21f4ae4 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/pytorch:24.06-py3 + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat new file mode 100644 index 0000000000..b13a780050 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh new file mode 100644 index 0000000000..71754a9cf1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat new file mode 100644 index 0000000000..24e904e9f1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh new file mode 100644 index 0000000000..24e904e9f1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/tmp-run.sh b/script/run-vllm-server/dockerfiles/tmp-run.sh new file mode 100755 index 0000000000..65c5270f0e --- /dev/null +++ b/script/run-vllm-server/dockerfiles/tmp-run.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +export CM_DOCKER_BUILD_ARGS="GID=\" $(id -g $USER) \" UID=\" $(id -u $USER) \" ${CM_DOCKER_BUILD_ARGS}" +export CM_BUILD_DOCKERFILE="no" +export CM_DOCKERFILE_WITH_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" +export CM_DOCKER_ADD_ALL_GPUS="True" +export CM_DOCKER_BUILD_ARGS="--build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \"" +export CM_DOCKER_BUILD_CMD="docker build --no-cache --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ." +export CM_DOCKER_CACHE="no" +export CM_DOCKER_CACHE_ARG=" --no-cache" +export CM_DOCKER_DETACHED_MODE="no" +export CM_DOCKER_EXTRA_RUN_ARGS=" --ulimit memlock=-1" +export CM_DOCKER_IMAGE_BASE="ubuntu:22.04" +export CM_DOCKER_IMAGE_NAME="cm-script-run-vllm-server" +export CM_DOCKER_IMAGE_RECREATE="yes" +export CM_DOCKER_IMAGE_REPO="cknowledge" +export CM_DOCKER_IMAGE_TAG="ubuntu-22.04-latest" +export CM_DOCKER_IMAGE_TAG_EXTRA="-latest" +export CM_DOCKER_INTERACTIVE_MODE="True" +export CM_DOCKER_OS="ubuntu" +export CM_DOCKER_OS_VERSION="22.04" +export CM_DOCKER_PORT_MAPS="['8000:8000']" +export CM_DOCKER_PRE_RUN_COMMANDS="[]" +export CM_DOCKER_RUN_CMD="cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --docker_run_deps " +export CM_DOCKER_RUN_SCRIPT_TAGS="run,server,vllm,vllm-server" +export CM_DOCKER_VOLUME_MOUNTS="[]" +export CM_MLOPS_REPO="anandhu-eng@cm4mlops" +export CM_QUIET="yes" +export CM_REAL_RUN="True" +export CM_RUN_STATE_DOCKER="True" +export CM_TMP_CURRENT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server" +export CM_TMP_CURRENT_SCRIPT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image" +export CM_TMP_CURRENT_SCRIPT_REPO_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" +export CM_TMP_CURRENT_SCRIPT_REPO_PATH_WITH_PREFIX="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" +export CM_TMP_CURRENT_SCRIPT_WORK_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles" +export CM_TMP_PIP_VERSION_STRING="" +export CM_VLLM_SERVER_API_KEY="" +export CM_VLLM_SERVER_MODEL_NAME="NousResearch/Hermes-2-Theta-Llama-3-8B" +export CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD="True" + + +. "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image/run.sh" diff --git a/script/run-vllm-server/dockerfiles/tmp-state.json b/script/run-vllm-server/dockerfiles/tmp-state.json new file mode 100644 index 0000000000..e03109d44a --- /dev/null +++ b/script/run-vllm-server/dockerfiles/tmp-state.json @@ -0,0 +1,3 @@ +{ + "docker": {} +} diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile new file mode 100644 index 0000000000..29c6ef0775 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:22.04 + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat new file mode 100644 index 0000000000..84acbf7833 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh new file mode 100644 index 0000000000..29217c2769 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat new file mode 100644 index 0000000000..4ef7c0806b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh new file mode 100644 index 0000000000..4ef7c0806b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/tmp-state.json b/script/run-vllm-server/tmp-state.json new file mode 100644 index 0000000000..e03109d44a --- /dev/null +++ b/script/run-vllm-server/tmp-state.json @@ -0,0 +1,3 @@ +{ + "docker": {} +} From 78c1452a716c83551fe963934039596b909e8bf5 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:12:52 +0530 Subject: [PATCH 5/7] clean temp files --- .../run-vllm-server/dockerfiles/.dockerignore | 1 - ...n8.9-aarch64-ubuntu22.04-public.Dockerfile | 45 ------------------- ...64-ubuntu22.04-public.Dockerfile.build.bat | 5 --- ...h64-ubuntu22.04-public.Dockerfile.build.sh | 5 --- ...nn8.9-x86_64-ubuntu20.04-public.Dockerfile | 45 ------------------- ...64-ubuntu20.04-public.Dockerfile.build.bat | 5 --- ..._64-ubuntu20.04-public.Dockerfile.build.sh | 5 --- ...6_64-ubuntu20.04-public.Dockerfile.run.bat | 1 - ...86_64-ubuntu20.04-public.Dockerfile.run.sh | 1 - .../dockerfiles/pytorch:24.06-py3.Dockerfile | 45 ------------------- .../pytorch:24.06-py3.Dockerfile.build.bat | 5 --- .../pytorch:24.06-py3.Dockerfile.build.sh | 5 --- .../pytorch:24.06-py3.Dockerfile.run.bat | 1 - .../pytorch:24.06-py3.Dockerfile.run.sh | 1 - script/run-vllm-server/dockerfiles/tmp-run.sh | 42 ----------------- .../dockerfiles/tmp-state.json | 3 -- .../dockerfiles/ubuntu_22.04.Dockerfile | 45 ------------------- .../ubuntu_22.04.Dockerfile.build.bat | 5 --- .../ubuntu_22.04.Dockerfile.build.sh | 5 --- .../ubuntu_22.04.Dockerfile.run.bat | 1 - .../ubuntu_22.04.Dockerfile.run.sh | 1 - 21 files changed, 272 deletions(-) delete mode 100644 script/run-vllm-server/dockerfiles/.dockerignore delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh delete mode 100755 script/run-vllm-server/dockerfiles/tmp-run.sh delete mode 100644 script/run-vllm-server/dockerfiles/tmp-state.json delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh diff --git a/script/run-vllm-server/dockerfiles/.dockerignore b/script/run-vllm-server/dockerfiles/.dockerignore deleted file mode 100644 index 6b8710a711..0000000000 --- a/script/run-vllm-server/dockerfiles/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.git diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile deleted file mode 100644 index 61cb8bb4c9..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat deleted file mode 100644 index 998a0d9230..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh deleted file mode 100644 index 4420c7341b..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile deleted file mode 100644 index cbf91bdaea..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat deleted file mode 100644 index 4b28e3c6c7..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh deleted file mode 100644 index 17521deb81..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat deleted file mode 100644 index e09e482494..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh deleted file mode 100644 index e09e482494..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile deleted file mode 100644 index 2db21f4ae4..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/pytorch:24.06-py3 - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat deleted file mode 100644 index b13a780050..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh deleted file mode 100644 index 71754a9cf1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat deleted file mode 100644 index 24e904e9f1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh deleted file mode 100644 index 24e904e9f1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/tmp-run.sh b/script/run-vllm-server/dockerfiles/tmp-run.sh deleted file mode 100755 index 65c5270f0e..0000000000 --- a/script/run-vllm-server/dockerfiles/tmp-run.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -export CM_DOCKER_BUILD_ARGS="GID=\" $(id -g $USER) \" UID=\" $(id -u $USER) \" ${CM_DOCKER_BUILD_ARGS}" -export CM_BUILD_DOCKERFILE="no" -export CM_DOCKERFILE_WITH_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -export CM_DOCKER_ADD_ALL_GPUS="True" -export CM_DOCKER_BUILD_ARGS="--build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \"" -export CM_DOCKER_BUILD_CMD="docker build --no-cache --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ." -export CM_DOCKER_CACHE="no" -export CM_DOCKER_CACHE_ARG=" --no-cache" -export CM_DOCKER_DETACHED_MODE="no" -export CM_DOCKER_EXTRA_RUN_ARGS=" --ulimit memlock=-1" -export CM_DOCKER_IMAGE_BASE="ubuntu:22.04" -export CM_DOCKER_IMAGE_NAME="cm-script-run-vllm-server" -export CM_DOCKER_IMAGE_RECREATE="yes" -export CM_DOCKER_IMAGE_REPO="cknowledge" -export CM_DOCKER_IMAGE_TAG="ubuntu-22.04-latest" -export CM_DOCKER_IMAGE_TAG_EXTRA="-latest" -export CM_DOCKER_INTERACTIVE_MODE="True" -export CM_DOCKER_OS="ubuntu" -export CM_DOCKER_OS_VERSION="22.04" -export CM_DOCKER_PORT_MAPS="['8000:8000']" -export CM_DOCKER_PRE_RUN_COMMANDS="[]" -export CM_DOCKER_RUN_CMD="cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --docker_run_deps " -export CM_DOCKER_RUN_SCRIPT_TAGS="run,server,vllm,vllm-server" -export CM_DOCKER_VOLUME_MOUNTS="[]" -export CM_MLOPS_REPO="anandhu-eng@cm4mlops" -export CM_QUIET="yes" -export CM_REAL_RUN="True" -export CM_RUN_STATE_DOCKER="True" -export CM_TMP_CURRENT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server" -export CM_TMP_CURRENT_SCRIPT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image" -export CM_TMP_CURRENT_SCRIPT_REPO_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" -export CM_TMP_CURRENT_SCRIPT_REPO_PATH_WITH_PREFIX="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" -export CM_TMP_CURRENT_SCRIPT_WORK_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles" -export CM_TMP_PIP_VERSION_STRING="" -export CM_VLLM_SERVER_API_KEY="" -export CM_VLLM_SERVER_MODEL_NAME="NousResearch/Hermes-2-Theta-Llama-3-8B" -export CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD="True" - - -. "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image/run.sh" diff --git a/script/run-vllm-server/dockerfiles/tmp-state.json b/script/run-vllm-server/dockerfiles/tmp-state.json deleted file mode 100644 index e03109d44a..0000000000 --- a/script/run-vllm-server/dockerfiles/tmp-state.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "docker": {} -} diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile deleted file mode 100644 index 29c6ef0775..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM ubuntu:22.04 - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat deleted file mode 100644 index 84acbf7833..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh deleted file mode 100644 index 29217c2769..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat deleted file mode 100644 index 4ef7c0806b..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh deleted file mode 100644 index 4ef7c0806b..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' From 5ad392405dfd4bdaefbc7b569f7500f5e4016aee Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:23:37 +0530 Subject: [PATCH 6/7] delete temp file --- script/run-vllm-server/tmp-state.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 script/run-vllm-server/tmp-state.json diff --git a/script/run-vllm-server/tmp-state.json b/script/run-vllm-server/tmp-state.json deleted file mode 100644 index e03109d44a..0000000000 --- a/script/run-vllm-server/tmp-state.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "docker": {} -} From 87a8bb58b98bc27f1dff53a3c00d01e8a28d2536 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 17 Jul 2024 21:11:40 +0100 Subject: [PATCH 7/7] Support criteo preprocessed dataset from MLC --- .../get-preprocessed-dataset-criteo/_cm.json | 146 ++++++++++++++++-- .../customize.py | 9 ++ 2 files changed, 138 insertions(+), 17 deletions(-) diff --git a/script/get-preprocessed-dataset-criteo/_cm.json b/script/get-preprocessed-dataset-criteo/_cm.json index 38af6754d0..ff22891bb4 100644 --- a/script/get-preprocessed-dataset-criteo/_cm.json +++ b/script/get-preprocessed-dataset-criteo/_cm.json @@ -31,37 +31,87 @@ "names": [ "dlrm-src" ], - "tags": "get,dlrm,src" + "tags": "get,dlrm,src", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { "names": [ "inference-src" ], - "tags": "mlperf,mlcommons,inference,source,src" + "tags": "mlperf,mlcommons,inference,source,src", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_scikit-learn" + "tags": "get,generic-python-lib,_scikit-learn", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_torch" + "tags": "get,generic-python-lib,_torch", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_opencv-python" + "tags": "get,generic-python-lib,_opencv-python", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_decorator" + "tags": "get,generic-python-lib,_decorator", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_psutil" + "tags": "get,generic-python-lib,_psutil", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_onnx" + "tags": "get,generic-python-lib,_onnx", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_tqdm" + "tags": "get,generic-python-lib,_tqdm", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_mlperf_logging" + "tags": "get,generic-python-lib,_mlperf_logging", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } } ], "input_mapping": { @@ -70,7 +120,8 @@ "threads": "CM_NUM_PREPROCESS_THREADS" }, "new_env_keys": [ - "CM_DATASET_*" + "CM_DATASET_*", + "CM_CRITEO_PREPROCESSED_PATH" ], "tags": [ "get", @@ -118,6 +169,37 @@ "CM_CRITEO_FAKE": "yes" } }, + "mlc": { + "group": "src", + "env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": "yes", + "CM_DATASET_PREPROCESSED_PATH": "on" + } + }, + "multihot,mlc": { + "env": { + "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com", + "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed", + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH", + "CM_EXTRACT_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH", + "CM_EXTRACT_TO_FOLDER": "criteo-preprocessed" + }, + "deps": [ + { + "tags": "download-and-extract,_rclone", + "update_tags_from_env_with_prefix": { + "_url.": [ + "CM_RCLONE_URL" + ] + }, + "force_cache": true, + "names": [ + "dae" + ], + "extra_cache_tags": "criteo,preprocessed,dataset" + } + ] + }, "multihot": { "group": "type", "default": true, @@ -130,22 +212,52 @@ "names": [ "mlperf-training", "training-src" - ] + ], + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.typing_inspect" + "tags": "get,generic-python-lib,_package.typing_inspect", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.iopath" + "tags": "get,generic-python-lib,_package.iopath", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.fbgemm_gpu" + "tags": "get,generic-python-lib,_package.fbgemm_gpu", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.torchrec" + "tags": "get,generic-python-lib,_package.torchrec", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.pyre_extensions" + "tags": "get,generic-python-lib,_package.pyre_extensions", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } } ] } diff --git a/script/get-preprocessed-dataset-criteo/customize.py b/script/get-preprocessed-dataset-criteo/customize.py index d6826e38cb..b1f2d03bc9 100644 --- a/script/get-preprocessed-dataset-criteo/customize.py +++ b/script/get-preprocessed-dataset-criteo/customize.py @@ -29,3 +29,12 @@ def preprocess(i): print("Using MLCommons Training source from '" + env['CM_MLPERF_TRAINING_SOURCE'] +"'") return {'return': 0} + +def postprocess(i): + + env = i['env'] + env['CM_CRITEO_PREPROCESSED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH'] + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_CRITEO_PREPROCESSED_PATH'] + + return {'return': 0}