From 6a81cd32df11541d47f328f5e12ed5bf567e0342 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Mon, 18 Nov 2024 14:28:49 +0530 Subject: [PATCH 1/2] add gnn dataset download script --- .../README.md | 1 + .../get-dataset-mlperf-inference-gnn/_cm.yaml | 46 +++++++++++++++++ .../customize.py | 49 +++++++++++++++++++ .../get-dataset-mlperf-inference-gnn/run.sh | 24 +++++++++ 4 files changed, 120 insertions(+) create mode 100644 script/get-dataset-mlperf-inference-gnn/README.md create mode 100644 script/get-dataset-mlperf-inference-gnn/_cm.yaml create mode 100644 script/get-dataset-mlperf-inference-gnn/customize.py create mode 100644 script/get-dataset-mlperf-inference-gnn/run.sh diff --git a/script/get-dataset-mlperf-inference-gnn/README.md b/script/get-dataset-mlperf-inference-gnn/README.md new file mode 100644 index 0000000000..76db0d5a18 --- /dev/null +++ b/script/get-dataset-mlperf-inference-gnn/README.md @@ -0,0 +1 @@ +Please see [https://docs.mlcommons.org/cm4mlops/scripts//get-dlrm-data-mlperf-inference](https://docs.mlcommons.org/cm4mlops/scripts//get-dlrm-data-mlperf-inference) for the documentation of this CM script. diff --git a/script/get-dataset-mlperf-inference-gnn/_cm.yaml b/script/get-dataset-mlperf-inference-gnn/_cm.yaml new file mode 100644 index 0000000000..d56bfa9806 --- /dev/null +++ b/script/get-dataset-mlperf-inference-gnn/_cm.yaml @@ -0,0 +1,46 @@ +alias: get-dataset-mlperf-inference-gnn +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- gnn +- icbh +- inference +uid: 824e61316c074253 +# new_env_keys: +input_mapping: + out_path: CM_IGBH_DATASET_OUT_PATH +deps: + - tags: mlperf,inference,source + names: + - inference-src + - tags: get,python + names: + - get-python +variations: + debug: + default: true + group: dataset-type + env: + CM_IGBH_DATASET_TYPE: debug + CM_IGBH_DATASET_SIZE: tiny + full: + group: dataset-type + env: + CM_IGBH_DATASET_TYPE: full + CM_IGBH_DATASET_SIZE: full + glt: + env: + CM_IGBH_GRAPH_COMPRESS: yes + csc: + group: compressed-layout + default: true + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csc + csr: + group: compressed-layout + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csr diff --git a/script/get-dataset-mlperf-inference-gnn/customize.py b/script/get-dataset-mlperf-inference-gnn/customize.py new file mode 100644 index 0000000000..192144f602 --- /dev/null +++ b/script/get-dataset-mlperf-inference-gnn/customize.py @@ -0,0 +1,49 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if os_info['platform'] == "windows": + return {'return':1, 'error': 'Script not supported in windows yet!'} + + print("Using MLCommons Inference source from '" + env['CM_MLPERF_INFERENCE_SOURCE'] +"'") + + #run cmd + run_cmd = "" + graph_folder = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') + + download_loc = env.get('CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + + run_cmd += f"cd {graph_folder} " + x_sep = " && " + + # download the model + if env['CM_IGBH_DATASET_TYPE'] == "debug": + run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + f" tools/download_igbh_test.py --target-path {download_loc}" + else: + run_cmd += x_sep + f"./tools/download_igbh_full.sh {download_loc}" + + # split seeds + run_cmd += x_sep + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}" + + # compress graph(for glt implementation) + if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": + run_cmd += x_sep + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + + env['CM_RUN_CMD'] = run_cmd + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + env['CM_IGBH_DATASET_PATH'] = env.get('CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + + print(f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}") + + return {'return':0} diff --git a/script/get-dataset-mlperf-inference-gnn/run.sh b/script/get-dataset-mlperf-inference-gnn/run.sh new file mode 100644 index 0000000000..2386521602 --- /dev/null +++ b/script/get-dataset-mlperf-inference-gnn/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + eval "$1" + exit_if_error +} + +run "$CM_RUN_CMD" From 80ee9faef04950d864033c857c4f89185b4230a5 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Mon, 18 Nov 2024 14:29:55 +0530 Subject: [PATCH 2/2] dlt readme --- script/get-dataset-mlperf-inference-gnn/README.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 script/get-dataset-mlperf-inference-gnn/README.md diff --git a/script/get-dataset-mlperf-inference-gnn/README.md b/script/get-dataset-mlperf-inference-gnn/README.md deleted file mode 100644 index 76db0d5a18..0000000000 --- a/script/get-dataset-mlperf-inference-gnn/README.md +++ /dev/null @@ -1 +0,0 @@ -Please see [https://docs.mlcommons.org/cm4mlops/scripts//get-dlrm-data-mlperf-inference](https://docs.mlcommons.org/cm4mlops/scripts//get-dlrm-data-mlperf-inference) for the documentation of this CM script.