diff --git a/.gitignore b/.gitignore index e7e809b..f5f52e1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ data/robomaker data/minio/bucket/current +data/minio/bucket/rl-deepracer-pretrained +data/minio/bucket/DeepRacer-Metrics data/minio/.minio.sys .idea -**/.idea \ No newline at end of file +**/.idea +__pycache__ diff --git a/README.md b/README.md index 3f44e45..953b1ba 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,9 @@ Very rough guide for use (details to come): - run `./start-training.sh` to start training - view docker logs to see if it's working (automatic if `tmux` is installed) - run `./stop-training.sh` to stop training. -- run `./delete_last_run.sh` to clear out the buckets for a fresh run. +- run `./delete_last_run.sh` to clear out the buckets for a fresh run. For convenient version without sudo prompt check out `utilites/delete-last.c`. +- run `./local-copy.sh ` to backup current model files into user specified MODEL directory. +- run `./mk-model.sh ` to create physical car uploadable .tar.gz file from your model. (Will be removed in a future update once file gets correctly generated after training) The first run will likely take quite a while to start as it needs to pull over 10GB of all the docker images. You can avoid this delay by pulling the images in advance: @@ -40,13 +42,16 @@ You can avoid this delay by pulling the images in advance: - `docker pull mattcamp/dr-coach` - `docker pull minio/minio` +## Modifying parameters +Hyperparameters for training are loaded from `hyperparams.json` inside `src/rl_coach_2020_v2/hyperparams.json` - shortcut link has been created in the root directory. Available options are exactly the same except the new option `pretrained` that simplifies enabling pretrained mode. + ## Video stream The video stream is available either via a web stream of via Kinesis. ### Web stream: -The web video stream is exposed on port 8888. If you're running a local browser then you should be able to browse directly to http://127.0.0.1:8888/stream_viewer?topic=/racecar/deepracer/kvs_stream once Robomaker has started. +The web video stream is exposed on port 8888. If you're running a local browser then you should be able to browse directly to `http://127.0.0.1:8888/stream_viewer?topic=/racecar/deepracer/kvs_stream` once Robomaker has started. ### Kinesis stream: @@ -62,6 +67,9 @@ Kinesis video is a stream of approx 1.5Mbps so beware the impact on your AWS cos Once working the stream should be visible in the Kinesis console. +### VNC +You can enter runnning environment using a vncviewer at localhost:8080. + ## Known issues: - Sometimes sagemaker won't start claiming that `/opt/ml/input/config/resourceconfig.json` is missing. Still trying to work out why. - Stopping training at the wrong time seems to cause a problem where sagemaker will crash next time when trying to load the 'best' model which may not exist properly. This only happens if you start a new training session without clearing out the bucket first. Yet to be seen if this will cause a problem when trying to use pretrained models. diff --git a/config.env b/config.env index e088867..7693806 100644 --- a/config.env +++ b/config.env @@ -1,14 +1,14 @@ ALTERNATE_DRIVING_DIRECTION=False APP_REGION=us-east-1 AWS_REGION=us-east-1 -AWS_ACCESS_KEY_ID=your_aws_access_key -AWS_SECRET_ACCESS_KEY=your_aws_secret_key +AWS_ACCESS_KEY_ID=minio +AWS_SECRET_ACCESS_KEY=miniokey CHANGE_START_POSITION=False GPU_AVAILABLE=True KINESIS_VIDEO_STREAM_NAME=dr-kvs-local LOCAL=True -MINIO_ACCESS_KEY=your_aws_access_key -MINIO_SECRET_KEY=your_aws_secret_key +MINIO_ACCESS_KEY=minio +MINIO_SECRET_KEY=miniokey MODEL_METADATA_FILE_S3_KEY=custom_files/model_metadata.json MODEL_S3_BUCKET=bucket MODEL_S3_PREFIX=current @@ -22,5 +22,5 @@ ENABLE_KINESIS=false ENABLE_GUI=true ENABLE_GPU_TRAINING=true ENABLE_LOCAL_DESKTOP=false -ENABLE_TMUX=true -MIN_EVAL_TRIALS=5 \ No newline at end of file +ENABLE_TMUX=false +MIN_EVAL_TRIALS=5 diff --git a/custom_files b/custom_files new file mode 120000 index 0000000..603fa21 --- /dev/null +++ b/custom_files @@ -0,0 +1 @@ +data/minio/bucket/custom_files \ No newline at end of file diff --git a/data/minio/bucket/custom_files/model_metadata.json b/data/minio/bucket/custom_files/model_metadata.json index b7520b4..6cfb1b0 100644 --- a/data/minio/bucket/custom_files/model_metadata.json +++ b/data/minio/bucket/custom_files/model_metadata.json @@ -7,27 +7,27 @@ }, { "steering_angle": -20, - "speed": 1.3333333333333333, + "speed": 1.2, "index": 1 }, { "steering_angle": -10, - "speed": 2, + "speed": 1.2, "index": 2 }, { "steering_angle": 0, - "speed": 2.5, + "speed": 1.2, "index": 3 }, { "steering_angle": 10, - "speed": 2, + "speed": 1.2, "index": 4 }, { "steering_angle": 20, - "speed": 1.3333333333333333, + "speed": 1.2, "index": 5 }, { @@ -37,8 +37,8 @@ } ], "sensor": [ - "STEREO_CAMERAS" + "FRONT_FACING_CAMERA" ], "neural_network": "DEEP_CONVOLUTIONAL_NETWORK_SHALLOW", "version": "2" -} \ No newline at end of file +} diff --git a/data/minio/bucket/custom_files/reward.py b/data/minio/bucket/custom_files/reward.py index 93adbe9..dad09bb 100644 --- a/data/minio/bucket/custom_files/reward.py +++ b/data/minio/bucket/custom_files/reward.py @@ -42,4 +42,4 @@ def reward_function(params): # the two aspects above reward += 1.0 * reward_lane + 4.0 * reward_avoid - return reward \ No newline at end of file + return reward diff --git a/data/minio/bucket/custom_files/training_params.yaml b/data/minio/bucket/custom_files/training_params.yaml index 870d1e6..a4725d8 100644 --- a/data/minio/bucket/custom_files/training_params.yaml +++ b/data/minio/bucket/custom_files/training_params.yaml @@ -20,6 +20,5 @@ METRIC_NAME: "TrainingRewardScore" CAR_COLOR: "Purple" TARGET_REWARD_SCORE: "None" NUMBER_OF_OBSTACLES: "3" -CHANGE_START_POSITION: "true" OBSTACLE_TYPE: "BOX" -RANDOMIZE_OBSTACLE_LOCATIONS: "false" \ No newline at end of file +RANDOMIZE_OBSTACLE_LOCATIONS: "false" diff --git a/docker-compose.yml b/docker-compose.yml index e071f7c..b308450 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,13 +21,13 @@ services: env_file: config.env container_name: coach volumes: - - '//var/run/docker.sock:/var/run/docker.sock' + - '/var/run/docker.sock:/var/run/docker.sock' - './src/rl_coach_2020_v2:/deepracer/rl_coach' - '/robo/container:/robo/container' depends_on: - minio robomaker: - image: awsdeepracercommunity/deepracer-robomaker:cpu + image: awsdeepracercommunity/deepracer-robomaker:cpu-avx2 command: ["${ROBOMAKER_COMMAND}"] volumes: - ./data/robomaker:/root/.ros/ diff --git a/hyperparams.json b/hyperparams.json new file mode 120000 index 0000000..def3daa --- /dev/null +++ b/hyperparams.json @@ -0,0 +1 @@ +src/rl_coach_2020_v2/hyperparams.json \ No newline at end of file diff --git a/local-copy.sh b/local-copy.sh new file mode 100755 index 0000000..a7a0f5b --- /dev/null +++ b/local-copy.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# USAGE: ./local-copy.sh + +MODELS=../models + +echo "Backup to $MODELS/$1" +echo "..." + +mkdir $MODELS/$1 + +cp data/robomaker/log/rl_coach_* $MODELS/$1/ +cp -R data/minio/bucket/current/model $MODELS/$1/ +cp data/minio/bucket/custom_files/reward.py $MODELS/$1/ + +echo "done" + diff --git a/mk-model.sh b/mk-model.sh new file mode 100755 index 0000000..15768d7 --- /dev/null +++ b/mk-model.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# create .tar.gz file uploadable to physical deepracer +# USAGE: ./mk-model.sh +cd $1 +echo $(pwd) + +if [ "$1" = "" ]; then + echo "USAGE: $0 " +else + + NUM=`cut -d '_' -f 1 < model/.coach_checkpoint` + + mkdir -p output/agent + + cp "model/model_$NUM.pb" output/agent/model.pb + cp model/model_metadata.json output/ + + cd output + tar -czvf ../output.tar.gz * + + echo "done" + +fi + diff --git a/src/rl_coach_2020_v2/hyperparams.json b/src/rl_coach_2020_v2/hyperparams.json new file mode 100644 index 0000000..0acadab --- /dev/null +++ b/src/rl_coach_2020_v2/hyperparams.json @@ -0,0 +1,17 @@ +{ + "batch_size": 64, + "beta_entropy": 0.01, + "discount_factor": 0.999, + "e_greedy_value": 0.05, + "epsilon_steps": 10000, + "exploration_type": "categorical", + "loss_type": "mean squared error", + "lr": 0.0003, + "num_episodes_between_training": 20, + "num_epochs": 10, + "stack_size": 1, + "term_cond_avg_score": 100000.0, + "term_cond_max_episodes": 10000, + "pretrained": "false" +} + diff --git a/src/rl_coach_2020_v2/rl_deepracer_coach_robomaker.py b/src/rl_coach_2020_v2/rl_deepracer_coach_robomaker.py index 15d31c6..5a41304 100644 --- a/src/rl_coach_2020_v2/rl_deepracer_coach_robomaker.py +++ b/src/rl_coach_2020_v2/rl_deepracer_coach_robomaker.py @@ -55,7 +55,6 @@ s3_location = "s3://%s/%s" % (s3_bucket, s3_prefix) print("Uploading to " + s3_location) - metric_definitions = [ # Training> Name=main_level/agent, Worker=0, Episode=19, Total reward=-102.88, Steps=19019, Training iteration=1 {'Name': 'reward-training', @@ -96,6 +95,38 @@ instance_type = "local_gpu" image_name = "awsdeepracercommunity/deepracer-sagemaker:gpu" +# Hyperparams +## Here we load hyperparameters from hyperparams.json file +with open('hyperparams.json', 'r', encoding='utf-8') as hp: + hyper = eval(hp.read()) +# Create dictionary that will be passed to estimator +# TODO: code can be simplified if we iterate over an array of keys to init dict +hyperparameters = {"s3_bucket": s3_bucket, + "s3_prefix": s3_prefix, + "aws_region": aws_region, + "model_metadata_s3_key": "s3://{}/custom_files/model_metadata.json".format(s3_bucket), + "RLCOACH_PRESET": RLCOACH_PRESET, + "batch_size": hyper["batch_size"], + "beta_entropy": hyper["beta_entropy"], + "discount_factor": hyper["discount_factor"], + "e_greedy_value": hyper["e_greedy_value"], + "epsilon_steps": hyper["epsilon_steps"], + "exploration_type": hyper["exploration_type"], + "loss_type": hyper["loss_type"], + "lr": hyper["lr"], + "num_episodes_between_training": hyper["num_episodes_between_training"], + "num_epochs": hyper["num_epochs"], + "stack_size": hyper["stack_size"], + "term_cond_avg_score": hyper["term_cond_avg_score"], + "term_cond_max_episodes": hyper["term_cond_max_episodes"] + } +# Enable pretrained if setting existed +if hyper["pretrained"].lower() == "true": + hyperparameters.update({ + "pretrained_s3_bucket": "{}".format(s3_bucket), + "pretrained_s3_prefix": "rl-deepracer-pretrained" + }) + estimator = RLEstimator(entry_point="training_worker.py", source_dir='src', dependencies=["common/sagemaker_rl"], @@ -111,29 +142,7 @@ base_job_name=job_name, image_name=image_name, train_max_run=job_duration_in_seconds, # Maximum runtime in seconds - hyperparameters={"s3_bucket": s3_bucket, - "s3_prefix": s3_prefix, - "aws_region": aws_region, - "model_metadata_s3_key": "s3://{}/custom_files/model_metadata.json".format(s3_bucket), - "RLCOACH_PRESET": RLCOACH_PRESET, - - "batch_size": 64, - "beta_entropy": 0.01, - "discount_factor": 0.999, - "e_greedy_value": 0.05, - "epsilon_steps": 10000, - "exploration_type": "categorical", - "loss_type": "mean squared error", - "lr": 0.0003, - "num_episodes_between_training": 20, - "num_epochs": 10, - "stack_size": 1, - "term_cond_avg_score": 100000.0, - "term_cond_max_episodes": 100000 - - #"pretrained_s3_bucket": "{}".format(s3_bucket), - #"pretrained_s3_prefix": "rl-deepracer-pretrained" - }, + hyperparameters=hyperparameters, metric_definitions = metric_definitions, s3_client=s3Client #subnets=default_subnets, # Required for VPC mode diff --git a/start-training.sh b/start-training.sh index 6553daa..a09a91a 100755 --- a/start-training.sh +++ b/start-training.sh @@ -13,15 +13,15 @@ export CURRENT_UID=$(id -u):$(id -g) docker-compose -f ./docker-compose.yml up -d if [ "$ENABLE_LOCAL_DESKTOP" = true ] ; then - echo "Starting desktop mode... waiting 20s for Sagemaker container to start" - sleep 20 + echo "Starting desktop mode... waiting 30s for Sagemaker container to start" + sleep 30 echo 'Attempting to pull up sagemaker logs...' SAGEMAKER_ID="$(docker ps | awk ' /sagemaker/ { print $1 }')" echo 'Attempting to open stream viewer and logs...' - gnome-terminal -x sh -c "echo viewer;x-www-browser -new-window http://localhost:8888/stream_viewer?topic=/racecar/deepracer/kvs_stream;sleep 1;wmctrl -r kvs_stream -b remove,maximized_vert,maximized_horz;sleep 1;wmctrl -r kvs_stream -e 1,100,100,720,640" - gnome-terminal -x sh -c "docker logs -f $SAGEMAKER_ID" + gnome-terminal --tab -- sh -c "echo viewer;x-www-browser -new-window http://localhost:8888/stream_viewer?topic=/racecar/deepracer/kvs_stream;sleep 1;wmctrl -r kvs_stream -b remove,maximized_vert,maximized_horz;sleep 1;wmctrl -r kvs_stream -e 1,100,100,720,640" + gnome-terminal --tab -- sh -c "docker logs -f $SAGEMAKER_ID" else echo "Started in headless server mode. Set ENABLE_LOCAL_DESKTOP to true in config.env for desktop mode." if [ "$ENABLE_TMUX" = true ] ; then diff --git a/tail-sagemaker-logs.sh b/tail-sagemaker-logs.sh index 419cabe..5dce7d5 100755 --- a/tail-sagemaker-logs.sh +++ b/tail-sagemaker-logs.sh @@ -7,4 +7,4 @@ do sleep 1 done -docker logs -f $SAGEMAKER_ID +docker logs --follow $SAGEMAKER_ID diff --git a/utilities/.gitignore b/utilities/.gitignore new file mode 100644 index 0000000..37649b2 --- /dev/null +++ b/utilities/.gitignore @@ -0,0 +1,3 @@ +* +!delete-last.c +!.gitignore diff --git a/utilities/delete-last.c b/utilities/delete-last.c new file mode 100644 index 0000000..d435e09 --- /dev/null +++ b/utilities/delete-last.c @@ -0,0 +1,105 @@ +/* + * Clean deepracer directory recursively. + * This utility was made to avoid entering sudo password every time + * you want to clean your data. + * PLEASE MAKE SURE TO UPDATE THE PATH BELOW IN main + * + * To build: gcc -o delete-last delete-last.c + * To run: + * sudo chown root: delete-last + * sudo chmod 4750 delete-last + * ./delete-last + */ +#include +#include +#include +#include +#include +#include +#include + +int filter(const struct dirent *entry, const unsigned char type) { + if (entry->d_type == type) + return strcmp(entry->d_name, ".") && strcmp(entry->d_name, ".."); + else + return 0; +} + +int filtdir(const struct dirent *entry) { + return filter(entry, DT_DIR); +} + +int filtreg(const struct dirent *entry) { + return filter(entry, DT_REG) || filter(entry, DT_LNK); +} + +void deletedir(const char* path) { + char full[512]; + int ret; + struct dirent **dirlist, **filelist; + int d, f; + d = scandir(path, &dirlist, filtdir, NULL); + f = scandir(path, &filelist, filtreg, NULL); + + printf("Deleting %s\n", path); + + if (!strcmp(path, "/")) { + printf("Error: remove filesystem root attempt!\n"); + exit(EXIT_FAILURE); + } + if (d < 0 || f < 0) { + perror("Failed to read directory"); + exit(EXIT_FAILURE); + } + + while (d--) { + strcpy(full, path); + strcat(full, "/"); + strcat(full, dirlist[d]->d_name); + deletedir(full); + ret = remove(full); + if (ret < 0) + perror("Failed to remove"); + free(dirlist[d]); + } + while (f--) { + strcpy(full, path); + strcat(full, "/"); + strcat(full, filelist[f]->d_name); + ret = remove(full); + if (ret < 0) + perror("Failed to remove"); + free(filelist[f]); + } + + free(dirlist); + free(filelist); +} + +int main() { + // path should be hardcoded + // we do not want root privileged program deleting all our stuff + const char* path = "//deepracer-local/"; + char* sub[2]; + char full[512]; + int ret; + // set subpaths + sub[0] = "data/robomaker"; + sub[1] = "data/minio/bucket/current"; + + // set UID to root + ret = setuid(0); + if (ret < 0) { + perror("Failed to grant root privileges"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < 2; i++) { + strcpy(full, path); + strcat(full, sub[i]); + deletedir(full); + } + + return ret; +} +