From b105c4ea2a570d0a114811d5acfb6e61aae997bc Mon Sep 17 00:00:00 2001 From: vprashar2929 Date: Sun, 10 Nov 2024 18:17:57 +0530 Subject: [PATCH] chore: remove the outdated reference of cmd/main.py This commit removes the obsolete reference to `cmd/main.py`, aligning the codebase with the current state of the project. Signed-off-by: vprashar2929 --- model_training/README.md | 5 +- model_training/script.sh | 408 +++++++++--------- model_training/tekton/pipelines/collect.yaml | 6 +- .../tekton/pipelines/complete-train.yaml | 6 +- .../tekton/pipelines/single-train.yaml | 6 +- model_training/tekton/tasks/extract-task.yaml | 3 +- model_training/tekton/tasks/isolate-task.yaml | 3 +- .../tekton/tasks/original-pipeline-task.yaml | 3 +- model_training/tekton/tasks/train-task.yaml | 3 +- src/kepler_model/train/ec2_pipeline.py | 6 +- 10 files changed, 220 insertions(+), 229 deletions(-) diff --git a/model_training/README.md b/model_training/README.md index 1ff38f54..bf5e5ebe 100644 --- a/model_training/README.md +++ b/model_training/README.md @@ -18,9 +18,10 @@ ## Requirements - git > 2.22 +- hatch - kubectl - yq, jq -- power meter is available +- power meter if available ## Pre-step @@ -71,7 +72,7 @@ There are two options to run the benchmark and collect the metrics, [CPE-operato ### With manual execution -In addition to the above two automation approach, you can manually run your own benchmarks, then collect, train, and export the models by the entrypoint `cmd/main.py` +In addition to the above two automation approach, you can manually run your own benchmarks, then collect, train, and export the models by the entrypoint [Manual Metric Collection and Training with Entrypoint](./cmd_instruction.md) diff --git a/model_training/script.sh b/model_training/script.sh index a71f47eb..59e6e162 100755 --- a/model_training/script.sh +++ b/model_training/script.sh @@ -26,299 +26,299 @@ declare ENTRYPOINT_IMG=${ENTRYPOINT_IMG:-"quay.io/sustainable_computing_io/keple declare PROMETHEUS_ENABLE=${PROMETHEUS_ENABLE:-"true"} declare TEKTON_ENABLE=${TEKTON_ENABLE:-"true"} -declare NATIVE=${NATIVE:-"true"} +declare NATIVE=${NATIVE:-"false"} clone_local_dev_cluster() { - [[ -d "$LOCAL_DEV_CLUSTER_DIR" ]] && { - echo "using local local-dev-cluster" - return 0 - } - - echo "downloading local-dev-cluster" - git clone -b "$LOCAL_DEV_CLUSTER_VERSION" \ - https://github.com/sustainable-computing-io/local-dev-cluster.git \ - --depth=1 \ - "$LOCAL_DEV_CLUSTER_DIR" - return $? + [[ -d "$LOCAL_DEV_CLUSTER_DIR" ]] && { + echo "using local local-dev-cluster" + return 0 + } + + echo "downloading local-dev-cluster" + git clone -b "$LOCAL_DEV_CLUSTER_VERSION" \ + https://github.com/sustainable-computing-io/local-dev-cluster.git \ + --depth=1 \ + "$LOCAL_DEV_CLUSTER_DIR" + return $? } rollout_ns_status() { - local ns="$1" - shift 1 - local resources="" - resources=$(kubectl get deployments,statefulsets,daemonsets -n="$ns" -o name) - for res in $resources; do - kubectl rollout status "$res" --namespace "$ns" --timeout=10m || { - echo "failed to check status of $res inside namespace $ns" - return 1 - } - done - return 0 + local ns="$1" + shift 1 + local resources="" + resources=$(kubectl get deployments,statefulsets,daemonsets -n="$ns" -o name) + for res in $resources; do + kubectl rollout status "$res" --namespace "$ns" --timeout=10m || { + echo "failed to check status of $res inside namespace $ns" + return 1 + } + done + return 0 } cluster_up() { - clone_local_dev_cluster - cd "$LOCAL_DEV_CLUSTER_DIR" - "$LOCAL_DEV_CLUSTER_DIR/main.sh" up - cd "$PROJECT_ROOT/model_training" + clone_local_dev_cluster + cd "$LOCAL_DEV_CLUSTER_DIR" + "$LOCAL_DEV_CLUSTER_DIR/main.sh" up + cd "$PROJECT_ROOT/model_training" } cluster_down() { - cd "$LOCAL_DEV_CLUSTER_DIR" - "$LOCAL_DEV_CLUSTER_DIR/main.sh" down + cd "$LOCAL_DEV_CLUSTER_DIR" + "$LOCAL_DEV_CLUSTER_DIR/main.sh" down } deploy_kepler() { - kubectl apply -f "$DEPOYMENT_DIR"/kepler.yaml - rollout_ns_status kepler + kubectl apply -f "$DEPOYMENT_DIR"/kepler.yaml + rollout_ns_status kepler } clean_deployment() { - kubectl delete -f "$DEPOYMENT_DIR" + kubectl delete -f "$DEPOYMENT_DIR" } clean_cpe_cr() { - kubectl delete -f "$CPE_BENCHMARK_DIR" || true - kubectl delete -f "$DEPOYMENT_DIR"/cpe-operator.yaml || true + kubectl delete -f "$CPE_BENCHMARK_DIR" || true + kubectl delete -f "$DEPOYMENT_DIR"/cpe-operator.yaml || true } deploy_cpe_operator() { - docker exec --privileged "$KIND_CLUSTER_NAME"-control-plane mkdir -p /cpe-local-log - docker exec --privileged "$KIND_CLUSTER_NAME"-control-plane chmod 777 /cpe-local-log - kubectl create -f "$DEPOYMENT_DIR"/cpe-operator.yaml - timeout 60s bash -c 'until kubectl get crd benchmarkoperators.cpe.cogadvisor.io 2>/dev/null; do sleep 1; done' - rollout_ns_status cpe-operator-system + docker exec --privileged "$KIND_CLUSTER_NAME"-control-plane mkdir -p /cpe-local-log + docker exec --privileged "$KIND_CLUSTER_NAME"-control-plane chmod 777 /cpe-local-log + kubectl create -f "$DEPOYMENT_DIR"/cpe-operator.yaml + timeout 60s bash -c 'until kubectl get crd benchmarkoperators.cpe.cogadvisor.io 2>/dev/null; do sleep 1; done' + rollout_ns_status cpe-operator-system } reload_prometheus() { - sleep 5 - curl -X POST localhost:9090/-/reload + sleep 5 + curl -X POST localhost:9090/-/reload } expect_num() { - local benchmark="$1" - local benchmark_ns="$2" - shift 2 - kubectl get benchmark "$benchmark" -n "$benchmark_ns" -oyaml >tmp.yaml - local benchmark_file="tmp.yaml" - num=$(yq ".spec.repetition" <"$benchmark_file") - [[ -z "$num" ]] && num=1 - - for v in $(yq eval ".spec.iterationSpec.iterations[].values | length" <"$benchmark_file"); do - ((num *= v)) - done - rm "tmp.yaml" - echo "$num" + local benchmark="$1" + local benchmark_ns="$2" + shift 2 + kubectl get benchmark "$benchmark" -n "$benchmark_ns" -oyaml | tee tmp.yaml + local benchmark_file="tmp.yaml" + num=$(yq ".spec.repetition" <"$benchmark_file") + [[ -z "$num" ]] && num=1 + + for v in $(yq eval ".spec.iterationSpec.iterations[].values | length" <"$benchmark_file"); do + ((num *= v)) + done + rm "tmp.yaml" + echo "$num" } wait_for_benchmark() { - local benchmark="$1" - local benchmark_ns="$2" - local sleep_time="$3" - local expect_num="" - local jobCompleted="" - - expect_num=$(expect_num "$benchmark" "$benchmark_ns") - jobCompleted=$(kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson | jq -r .status.jobCompleted) - echo "Wait for $expect_num $benchmark jobs to be completed, sleep $sleep_time" - - while [ "$jobCompleted" != "$expect_num/$expect_num" ]; do - sleep "$sleep_time" - echo "Wait for $benchmark to be completed... $jobCompleted, sleep $sleep_time" - jobCompleted=$(kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson | jq -r .status.jobCompleted) - done - echo "Benchmark job completed" + local benchmark="$1" + local benchmark_ns="$2" + local sleep_time="$3" + local expect_num="" + local jobCompleted="" + + expect_num=$(expect_num "$benchmark" "$benchmark_ns") + jobCompleted=$(kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson | jq -r .status.jobCompleted) + echo "Wait for $expect_num $benchmark jobs to be completed, sleep $sleep_time" + + while [ "$jobCompleted" != "$expect_num/$expect_num" ]; do + sleep "$sleep_time" + echo "Wait for $benchmark to be completed... $jobCompleted, sleep $sleep_time" + jobCompleted=$(kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson | jq -r .status.jobCompleted) + done + echo "Benchmark job completed" } save_benchmark() { - local benchmark="$1" - local benchmark_ns="$2" - shift 2 - kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson >"$DATAPATH/$benchmark.json" + local benchmark="$1" + local benchmark_ns="$2" + shift 2 + kubectl get benchmark "$benchmark" -n "$benchmark_ns" -ojson | tee "$DATAPATH/$benchmark.json" } collect_idle() { - local args=( - "-o" "idle" - "--interval" "1000" - ) - if ! "$NATIVE"; then - docker run --rm -v "$DATAPATH":/data --network=host "$ENTRYPOINT_IMG" query "${args[@]}" - else - python ../cmd/main.py query "${args[@]}" || true - fi + local args=( + "-o" "idle" + "--interval" "1000" + ) + if ! "$NATIVE"; then + docker run --rm -v "$DATAPATH":/data --network=host --entrypoint kepler-model "$ENTRYPOINT_IMG" query "${args[@]}" + else + kepler-model query "${args[@]}" || true + fi } collect_data() { - local benchmark="$1" - local benchmark_ns="$2" - local sleep_time="$3" - shift 3 - [[ "$benchmark" != "customBenchmark" ]] && { - kubectl apply -f "$CPE_BENCHMARK_DIR"/"$benchmark".yaml - wait_for_benchmark "$benchmark" "$benchmark_ns" "$sleep_time" - save_benchmark "$benchmark" "$benchmark_ns" - kubectl delete -f "$CPE_BENCHMARK_DIR"/"$benchmark".yaml - } - local args=( - "-i" "$benchmark" - "-o" "${benchmark}_kepler_query" - "-s" "$PROM_SERVER" - ) - if ! "$NATIVE"; then - docker run --rm -v "$DATAPATH":/data --network=host "$ENTRYPOINT_IMG" query "${args[@]}" || true - else - python ../cmd/main.py query "${args[@]}" || true - fi + local benchmark="$1" + local benchmark_ns="$2" + local sleep_time="$3" + shift 3 + [[ "$benchmark" != "customBenchmark" ]] && { + kubectl apply -f "$CPE_BENCHMARK_DIR"/"$benchmark".yaml + wait_for_benchmark "$benchmark" "$benchmark_ns" "$sleep_time" + save_benchmark "$benchmark" "$benchmark_ns" + kubectl delete -f "$CPE_BENCHMARK_DIR"/"$benchmark".yaml + } + local args=( + "-i" "$benchmark" + "-o" "${benchmark}_kepler_query" + "-s" "$PROM_SERVER" + ) + if ! "$NATIVE"; then + docker run --rm -v "$DATAPATH":/data --network=host --entrypoint kepler-model "$ENTRYPOINT_IMG" query "${args[@]}" || true + else + kepler-model query "${args[@]}" || true + fi } deploy_prom_dependency() { - kubectl apply -f "$DEPOYMENT_DIR"/prom-kepler-rbac.yaml - kubectl apply -f "$DEPOYMENT_DIR"/prom-np.yaml + kubectl apply -f "$DEPOYMENT_DIR"/prom-kepler-rbac.yaml + kubectl apply -f "$DEPOYMENT_DIR"/prom-np.yaml } train_model() { - local query_response="$1" - local pipeline_name="${PIPELINE_PREFIX}$2" - echo "input=$query_response" - echo "pipeline=$pipeline_name" - local args=( - "-i" "$query_response" - "-p" "$pipeline_name" - "--energy-source" "$ENERGY_SOURCE" - ) - if ! "$NATIVE"; then - echo "Train with docker" - docker run --rm -v "$DATAPATH":/data "$ENTRYPOINT_IMG" train "${args[@]}" || true - else - echo "Train natively" - python ../cmd/main.py train "${args[@]}" || true - fi + local query_response="$1" + local pipeline_name="${PIPELINE_PREFIX}$2" + echo "input=$query_response" + echo "pipeline=$pipeline_name" + local args=( + "-i" "$query_response" + "-p" "$pipeline_name" + "--energy-source" "$ENERGY_SOURCE" + ) + if ! "$NATIVE"; then + echo "Train with docker" + docker run --rm -v "$DATAPATH":/data --entrypoint kepler-model "$ENTRYPOINT_IMG" train "${args[@]}" || true + else + echo "Train natively" + kepler-model train "${args[@]}" || true + fi } prepare_cluster() { - cluster_up - deploy_kepler - deploy_prom_dependency - watch_service 9090 "monitoring" prometheus-k8s & - reload_prometheus + cluster_up + deploy_kepler + deploy_prom_dependency + watch_service 9090 "monitoring" prometheus-k8s & + reload_prometheus } watch_service() { - local port="$1" - local ns="$2" - local svn="$3" - shift 3 - kubectl port-forward --address localhost -n "$ns" service/"$svn" "$port":"$port" + local port="$1" + local ns="$2" + local svn="$3" + shift 3 + kubectl port-forward --address localhost -n "$ns" service/"$svn" "$port":"$port" } collect() { - collect_idle - collect_data stressng cpe-operator-system 60 + collect_idle + collect_data stressng cpe-operator-system 60 } custom_collect() { - collect_data customBenchmark + collect_data customBenchmark } quick_collect() { - collect_data sample cpe-operator-system 10 + collect_data sample cpe-operator-system 10 } train() { - train_model stressng_kepler_query "${VERSION}_stressng" + train_model stressng_kepler_query "${VERSION}_stressng" } quick_train() { - train_model sample_kepler_query "${VERSION}_sample" + train_model sample_kepler_query "${VERSION}_sample" } custom_train() { - train_model customBenchmark_kepler_query "${VERSION}_customBenchmark" + train_model customBenchmark_kepler_query "${VERSION}_customBenchmark" } validate() { - local benchmark="$1" - local args=( - "-i" "${benchmark}_kepler_query" - "--benchmark" "$benchmark" - ) - if ! "$NATIVE"; then - docker run --rm -v "$DATAPATH":/data "$ENTRYPOINT_IMG" validate "${args[@]}" - else - python ../cmd/main.py validate "${args[@]}" || true - fi + local benchmark="$1" + local args=( + "-i" "${benchmark}_kepler_query" + "--benchmark" "$benchmark" + ) + if ! "$NATIVE"; then + docker run --rm -v "$DATAPATH":/data --entrypoint kepler-model "$ENTRYPOINT_IMG" validate "${args[@]}" + else + kepler-model validate "${args[@]}" || true + fi } _export() { - [[ $# -lt 4 ]] && { - echo "need arguements: [machine_id] [path_to_models] [publisher] [benchmark_name]" - return 1 - } - local id="$1" - local output="$2" - local publisher="$3" - local main_collect_input="$4" - local include_raw="$5" - shift 5 - - local pipeline_name="${PIPELINE_PREFIX}${VERSION}_${main_collect_input}" - local validate_input="${main_collect_input}_kepler_query" - - local args=( - "--id" "$id" - "-p" "$pipeline_name" - "-i" "$validate_input" - "--benchmark" "$main_collect_input" - "--version" "$VERSION" - "--publisher" "$publisher" - "$include_raw" - ) - echo "${args[@]}" - if ! "$NATIVE"; then - docker run --rm -v "$DATAPATH":/data -v "$output":/models "$ENTRYPOINT_IMG" export "${args[@]}" -o /models - else - python ../cmd/main.py export "${args[@]}" -o "$output" - fi - return 0 + [[ $# -lt 4 ]] && { + echo "need arguements: [machine_id] [path_to_models] [publisher] [benchmark_name]" + return 1 + } + local id="$1" + local output="$2" + local publisher="$3" + local main_collect_input="$4" + local include_raw="$5" + shift 5 + + local pipeline_name="${PIPELINE_PREFIX}${VERSION}_${main_collect_input}" + local validate_input="${main_collect_input}_kepler_query" + + local args=( + "--id" "$id" + "-p" "$pipeline_name" + "-i" "$validate_input" + "--benchmark" "$main_collect_input" + "--version" "$VERSION" + "--publisher" "$publisher" + "$include_raw" + ) + echo "${args[@]}" + if ! "$NATIVE"; then + docker run --rm -v "$DATAPATH":/data -v "$output":/models --entrypoint kepler-model "$ENTRYPOINT_IMG" export "${args[@]}" -o /models + else + kepler-model export "${args[@]}" -o "$output" + fi + return 0 } export_models() { - local id="$1" - local path_to_models="$2" - local publisher="$3" - local benchmark_name="$4" - shift 4 - _export "$id" "$path_to_models" "$publisher" "$benchmark_name" || return 1 - return 0 + local id="$1" + local path_to_models="$2" + local publisher="$3" + local benchmark_name="$4" + shift 4 + _export "$id" "$path_to_models" "$publisher" "$benchmark_name" || return 1 + return 0 } export_models_with_raw() { - local id="$1" - local path_to_models="$2" - local publisher="$3" - local benchmark_name="$4" - shift 4 - _export "$id" "$path_to_models" "$publisher" "$benchmark_name" "--include-raw true" || return 1 - return 0 + local id="$1" + local path_to_models="$2" + local publisher="$3" + local benchmark_name="$4" + shift 4 + _export "$id" "$path_to_models" "$publisher" "$benchmark_name" "--include-raw true" || return 1 + return 0 } cleanup() { - clean_cpe_cr - clean_deployment || true - cluster_down + clean_cpe_cr + clean_deployment || true + cluster_down } main() { - local op="$1" - shift 1 - mkdir -p "$DATAPATH" - export MODEL_PATH=$DATAPATH - export DATAPATH - export PROMETHEUS_ENABLE - export TEKTON_ENABLE - export KIND_CLUSTER_NAME - $op "$@" + local op="$1" + shift 1 + mkdir -p "$DATAPATH" + export MODEL_PATH=$DATAPATH + export DATAPATH + export PROMETHEUS_ENABLE + export TEKTON_ENABLE + export KIND_CLUSTER_NAME + $op "$@" } diff --git a/model_training/tekton/pipelines/collect.yaml b/model_training/tekton/pipelines/collect.yaml index 52445785..93a3c026 100644 --- a/model_training/tekton/pipelines/collect.yaml +++ b/model_training/tekton/pipelines/collect.yaml @@ -88,7 +88,6 @@ spec: - name: collect-idle image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --interval=$(params.IDLE_COLLECT_INTERVAL) @@ -96,7 +95,7 @@ spec: - --benchmark=idle - -o=idle - --id=$(params.MACHINE_ID) - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 @@ -142,7 +141,6 @@ spec: - name: collect-stressng image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --start-time=$(tasks.presteps.results.stress-start-time) @@ -151,7 +149,7 @@ spec: - --id=$(params.MACHINE_ID) - --benchmark=stressng - -o=kepler_query - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 diff --git a/model_training/tekton/pipelines/complete-train.yaml b/model_training/tekton/pipelines/complete-train.yaml index 55545040..e22efa7a 100644 --- a/model_training/tekton/pipelines/complete-train.yaml +++ b/model_training/tekton/pipelines/complete-train.yaml @@ -104,7 +104,6 @@ spec: - name: collect-idle image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --interval=$(params.IDLE_COLLECT_INTERVAL) @@ -112,7 +111,7 @@ spec: - --benchmark=idle - -o=idle - --id=$(params.MACHINE_ID) - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 @@ -158,7 +157,6 @@ spec: - name: collect-stressng image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --start-time=$(tasks.presteps.results.stress-start-time) @@ -167,7 +165,7 @@ spec: - --benchmark=stressng - -o=kepler_query - --id=$(params.MACHINE_ID) - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 diff --git a/model_training/tekton/pipelines/single-train.yaml b/model_training/tekton/pipelines/single-train.yaml index d805408b..9c916f45 100644 --- a/model_training/tekton/pipelines/single-train.yaml +++ b/model_training/tekton/pipelines/single-train.yaml @@ -112,7 +112,6 @@ spec: - name: collect-idle image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --interval=$(params.IDLE_COLLECT_INTERVAL) @@ -120,7 +119,7 @@ spec: - --benchmark=idle - -o=idle - --id=$(params.MACHINE_ID) - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 @@ -166,7 +165,6 @@ spec: - name: collect-stressng image: $(params.MODEL_SERVER_IMAGE) args: - - cmd/main.py - query - --data-path=$(workspaces.mnt.path)/data - --start-time=$(tasks.presteps.results.stress-start-time) @@ -175,7 +173,7 @@ spec: - --benchmark=stressng - -o=kepler_query - --id=$(params.MACHINE_ID) - command: [python3.10] + command: [kepler-model] env: - name: PROM_SERVER value: http://prometheus-k8s.monitoring.svc:9090 diff --git a/model_training/tekton/tasks/extract-task.yaml b/model_training/tekton/tasks/extract-task.yaml index 3864d6db..b29fdd25 100644 --- a/model_training/tekton/tasks/extract-task.yaml +++ b/model_training/tekton/tasks/extract-task.yaml @@ -37,9 +37,8 @@ spec: steps: - name: extract image: $(params.MODEL_SERVER_IMAGE) - command: [python3.10] + command: [kepler-model] args: - - cmd/main.py - extract - --data-path=$(workspaces.mnt.path)/data - --input=kepler_query diff --git a/model_training/tekton/tasks/isolate-task.yaml b/model_training/tekton/tasks/isolate-task.yaml index 629f5092..1aaad402 100644 --- a/model_training/tekton/tasks/isolate-task.yaml +++ b/model_training/tekton/tasks/isolate-task.yaml @@ -47,9 +47,8 @@ spec: steps: - name: isolate image: $(params.MODEL_SERVER_IMAGE) - command: [python3.10] + command: [kepler-model] args: - - cmd/main.py - isolate - --data-path=$(workspaces.mnt.path)/data - --input=kepler_query diff --git a/model_training/tekton/tasks/original-pipeline-task.yaml b/model_training/tekton/tasks/original-pipeline-task.yaml index 5d99a45b..1ad7b5d3 100644 --- a/model_training/tekton/tasks/original-pipeline-task.yaml +++ b/model_training/tekton/tasks/original-pipeline-task.yaml @@ -49,12 +49,11 @@ spec: steps: - name: pipeline-train image: $(params.MODEL_SERVER_IMAGE) - command: [python3.10] + command: [kepler-model] env: - name: MODEL_PATH value: $(workspaces.mnt.path)/models args: - - cmd/main.py - train - --data-path=$(workspaces.mnt.path)/data - --input=kepler_query diff --git a/model_training/tekton/tasks/train-task.yaml b/model_training/tekton/tasks/train-task.yaml index 7835fe34..fb904341 100644 --- a/model_training/tekton/tasks/train-task.yaml +++ b/model_training/tekton/tasks/train-task.yaml @@ -43,12 +43,11 @@ spec: steps: - name: train-from-data image: $(params.MODEL_SERVER_IMAGE) - command: [python3.10] + command: [kepler-model] env: - name: MODEL_PATH value: $(workspaces.mnt.path)/models args: - - cmd/main.py - train_from_data - --data-path=$(workspaces.mnt.path)/data - --input=$(params.INPUT_DATA) diff --git a/src/kepler_model/train/ec2_pipeline.py b/src/kepler_model/train/ec2_pipeline.py index d043f277..595d55db 100644 --- a/src/kepler_model/train/ec2_pipeline.py +++ b/src/kepler_model/train/ec2_pipeline.py @@ -8,11 +8,11 @@ after run: example of plot command: -DATAPATH=/path/to/models python cmd/main.py plot --target-data estimate --input /path/to/data/i3.metal/kepler_query.json --pipeline-name ec2 --energy-source rapl-sysfs --model-name LinearRegressionTrainer_4 --output-type AbsPower --output i3metal-ec2 --feature-group BPFOnly -DATAPATH=/path/to/models python cmd/main.py plot --target-data estimate --input /path/to/data/i3.metal/kepler_query.json --pipeline-name ec2 --energy-source rapl-sysfs --model-name LogarithmicRegressionTrainer_4 --output-type AbsPower --output i3metal-ec2 --feature-group BPFOnly +DATAPATH=/path/to/models kepler-model plot --target-data estimate --input /path/to/data/i3.metal/kepler_query.json --pipeline-name ec2 --energy-source rapl-sysfs --model-name LinearRegressionTrainer_4 --output-type AbsPower --output i3metal-ec2 --feature-group BPFOnly +DATAPATH=/path/to/models kepler-model plot --target-data estimate --input /path/to/data/i3.metal/kepler_query.json --pipeline-name ec2 --energy-source rapl-sysfs --model-name LogarithmicRegressionTrainer_4 --output-type AbsPower --output i3metal-ec2 --feature-group BPFOnly example of export command: -DATAPATH=/path/to/models python cmd/main.py export --pipeline-name ec2-0.7.11 -o /path/to/kepler-model-db/models --publisher sunya-ch --zip=true --collect-date "July 2024" +DATAPATH=/path/to/models kepler-model export --pipeline-name ec2-0.7.11 -o /path/to/kepler-model-db/models --publisher sunya-ch --zip=true --collect-date "July 2024" """ import json