From bc042473234598492269e443ef4391b1d3838453 Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 15:10:38 +0200 Subject: [PATCH 01/12] update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 4ce3357..9c5fd58 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .idea/ cmake-build-*/ +*.code-workspace +*.pk3 +src/gitinfo.h From c1a1fe4a444d3f403e8c9bc866e8dd94bdf01245 Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 16:16:47 +0200 Subject: [PATCH 02/12] add encodings and heatmap-based rewards to sequence storage --- include/App.hpp | 4 ++- src/App.cpp | 74 +++++++++++++++++++++++++++++++++++++++++----- src/ModelProto.cpp | 3 +- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/include/App.hpp b/include/App.hpp index c0f2e3f..f351fd9 100644 --- a/include/App.hpp +++ b/include/App.hpp @@ -55,6 +55,8 @@ class App { ModelProto _model; - + torch::Device _torchDevice; + FrameEncoder _frameEncoder; + void nextMap(); // proceed to next map }; diff --git a/src/App.cpp b/src/App.cpp index da8e14f..8871cee 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -9,15 +9,25 @@ // #include "App.hpp" - +#include "Constants.hpp" +#include #include "gvizdoom/DoomGame.hpp" - #include #include "Constants.hpp" +<<<<<<< HEAD using namespace doot2; using namespace gvizdoom; +======= + +using namespace doot2; +using namespace gvizdoom; +using namespace torch; +using namespace torch::indexing; +namespace fs = std::filesystem; + +>>>>>>> add encodings and heatmap-based rewards to sequence storage App::App() : _rnd (1507715517), @@ -27,12 +37,13 @@ App::App() : _quit (false), _heatmapActionModule (HeatmapActionModule::Settings{256, 32.0f}), _doorTraversalActionModule (false), - _sequenceStorage (SequenceStorage::Settings{batchSize, sequenceLength, true, false, frameWidth, frameHeight, ImageFormat::BGRA}), + _sequenceStorage (SequenceStorage::Settings{batchSize, sequenceLength, false, true, 0, 0, ImageFormat::BGRA, encodingLength}), _positionPlot (1024, 1024, CV_32FC3, cv::Scalar(0.0f)), _initPlayerPos (0.0f, 0.0f), _frameId (0), _batchEntryId (0), - _newPatchReady (false) + _newPatchReady (false), + _torchDevice (torch::cuda::is_available() ? kCUDA : kCPU) { auto& doomGame = DoomGame::instance(); @@ -71,6 +82,21 @@ App::App() : // Setup ActionManager _actionManager.addModule(&_doorTraversalActionModule); _actionManager.addModule(&_heatmapActionModule); + + // Load frame encoder + if (fs::exists(frameEncoderFilename)) { + printf("Loading frame encoder model from %s\n", frameEncoderFilename); // TODO logging + serialize::InputArchive inputArchive; + inputArchive.load_from(frameEncoderFilename); + _frameEncoder->load(inputArchive); + // Use the inference mode + _frameEncoder->eval(); + } + else { + printf("No %s found. Initializing a new frame encoder model.\n", frameEncoderFilename); // TODO logging + } + + _frameEncoder->to(_torchDevice); } App::~App() @@ -94,6 +120,9 @@ void App::loop() size_t recordBeginFrameId = 768+_rnd()%512; size_t recordEndFrameId = recordBeginFrameId+64; + // BHWC + torch::Tensor pixelBuffer{torch::zeros({1, 480, 640, 4})}; + while (!_quit) { while(SDL_PollEvent(&event)) { if (event.type == SDL_QUIT || @@ -136,10 +165,39 @@ void App::loop() auto recordFrameId = _frameId - recordBeginFrameId; auto batch = _sequenceStorage[recordFrameId]; batch.actions[_batchEntryId] = action; - Image frame(doomGame.getScreenWidth(), doomGame.getScreenHeight(), ImageFormat::BGRA); - frame.copyFrom(doomGame.getPixelsBGRA()); - convertImage(frame, batch.frames[_batchEntryId]); - batch.rewards[_batchEntryId] = 0.0; // TODO no rewards for now + + // Convert the game frame from uint8 to float + const auto imageFormat{ImageFormat::BGRA}; + Image frameUint8(doomGame.getScreenWidth(), doomGame.getScreenHeight(), imageFormat); + Image frameFloat(doomGame.getScreenWidth(), doomGame.getScreenHeight(), imageFormat); + frameUint8.copyFrom(doomGame.getPixelsBGRA()); + convertImage(frameUint8, frameFloat); + + // Copy the float frame to a torch::Tensor + const auto nPixels = doomGame.getScreenWidth() * doomGame.getScreenHeight() * getImageFormatNChannels(imageFormat); + copyToTensor(frameFloat.data(), nPixels, pixelBuffer); + + // upload to GPU and permute to BCWH + torch::Tensor pixelBufferGpu = pixelBuffer.to(_torchDevice); + pixelBufferGpu = pixelBufferGpu.permute({0,3,1,2}); + + const auto tempsz = pixelBufferGpu.sizes(); + printf("Tensor: %d %d %d %d\n", tempsz[0], tempsz[1], tempsz[2], tempsz[3]); + + // encode + torch::Tensor encoding = _frameEncoder(pixelBufferGpu); + + // TODO: check encoding sanity with decoder + + // store encoding to the sequence storage + copyFromTensor(encoding.to(torch::kCPU), batch.encodings[_batchEntryId], encodingLength); + + // Update relative player position + playerPosRelative(0) = doomGame.getGameState()(0) - _initPlayerPos(0); + playerPosRelative(1) = _initPlayerPos(1) - doomGame.getGameState()(1); // invert y + + batch.rewards[_batchEntryId] = _heatmapActionModule.sample(playerPosRelative, true); + printf("reward: %.5f\n", batch.rewards[_batchEntryId]); } // Render screen diff --git a/src/ModelProto.cpp b/src/ModelProto.cpp index 9b8a82e..29ba505 100644 --- a/src/ModelProto.cpp +++ b/src/ModelProto.cpp @@ -9,6 +9,8 @@ // #include "ModelProto.hpp" + +#include "Constants.hpp" #include "SequenceStorage.hpp" #include // TODO temp @@ -23,7 +25,6 @@ static constexpr double learningRate = 1.0e-3; // TODO static constexpr int64_t nTrainingEpochs = 10; using namespace doot2; - using namespace torch; namespace tf = torch::nn::functional; namespace fs = std::filesystem; From e03b2c1a88f003991e643cdd80e1cd49a19c42af Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 17:30:44 +0200 Subject: [PATCH 03/12] SequenceStorage: fix invalid actions length bug if frames are not used but encodings are used, actions length is zero --- src/SequenceStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SequenceStorage.cpp b/src/SequenceStorage.cpp index a460b0e..e46e5fd 100644 --- a/src/SequenceStorage.cpp +++ b/src/SequenceStorage.cpp @@ -77,7 +77,7 @@ SequenceStorage::ConstBatchHandle::ConstBatchHandle( SequenceStorage::SequenceStorage(const Settings& settings) : _settings (settings), _frameSize (_settings.frameWidth*_settings.frameHeight*getImageFormatNChannels(_settings.frameFormat)), - _actions (_settings.hasFrames ? _settings.length*_settings.batchSize : 0), + _actions (_settings.length*_settings.batchSize), _frameData (_settings.hasFrames ? _settings.length*_settings.batchSize*_frameSize : 0), _encodings (_settings.hasEncodings ? _settings.length*_settings.batchSize : 0), _encodingData (_settings.hasEncodings ? _settings.length*_settings.batchSize*_settings.encodingLength : 0), From 41787f52830b953684101ba7c59e3ba0ef4a18df Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 17:31:28 +0200 Subject: [PATCH 04/12] App: set reward as negative heatmap value --- src/App.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 8871cee..868368b 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -177,12 +177,11 @@ void App::loop() const auto nPixels = doomGame.getScreenWidth() * doomGame.getScreenHeight() * getImageFormatNChannels(imageFormat); copyToTensor(frameFloat.data(), nPixels, pixelBuffer); - // upload to GPU and permute to BCWH + // upload to GPU and permute to BCHW torch::Tensor pixelBufferGpu = pixelBuffer.to(_torchDevice); pixelBufferGpu = pixelBufferGpu.permute({0,3,1,2}); const auto tempsz = pixelBufferGpu.sizes(); - printf("Tensor: %d %d %d %d\n", tempsz[0], tempsz[1], tempsz[2], tempsz[3]); // encode torch::Tensor encoding = _frameEncoder(pixelBufferGpu); @@ -196,8 +195,8 @@ void App::loop() playerPosRelative(0) = doomGame.getGameState()(0) - _initPlayerPos(0); playerPosRelative(1) = _initPlayerPos(1) - doomGame.getGameState()(1); // invert y - batch.rewards[_batchEntryId] = _heatmapActionModule.sample(playerPosRelative, true); - printf("reward: %.5f\n", batch.rewards[_batchEntryId]); + // Reward is negative heatmap value + batch.rewards[_batchEntryId] = -_heatmapActionModule.sample(playerPosRelative, true); } // Render screen From 14eaf5f5d2e07bc3a0010cb0d576b1c605def5ba Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 18:00:40 +0200 Subject: [PATCH 05/12] App show decoded encoding temporarily for debugging --- include/App.hpp | 2 ++ src/App.cpp | 35 +++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/App.hpp b/include/App.hpp index f351fd9..469cc9c 100644 --- a/include/App.hpp +++ b/include/App.hpp @@ -57,6 +57,8 @@ class App { torch::Device _torchDevice; FrameEncoder _frameEncoder; + FrameDecoder _frameDecoder; + void nextMap(); // proceed to next map }; diff --git a/src/App.cpp b/src/App.cpp index 868368b..df753c0 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -85,7 +85,7 @@ App::App() : // Load frame encoder if (fs::exists(frameEncoderFilename)) { - printf("Loading frame encoder model from %s\n", frameEncoderFilename); // TODO logging + printf("App: Loading frame encoder model from %s\n", frameEncoderFilename); // TODO logging serialize::InputArchive inputArchive; inputArchive.load_from(frameEncoderFilename); _frameEncoder->load(inputArchive); @@ -97,6 +97,23 @@ App::App() : } _frameEncoder->to(_torchDevice); + + // Load frame decoder + if (fs::exists(frameDecoderFilename)) { + printf("App: Loading frame encoder model from %s\n", frameDecoderFilename); // TODO logging + serialize::InputArchive inputArchive; + inputArchive.load_from(frameDecoderFilename); + _frameDecoder->load(inputArchive); + // Use the inference mode + _frameDecoder->eval(); + } + else { + printf("No %s found. Initializing a new frame encoder model.\n", frameDecoderFilename); // TODO logging + } + + _frameDecoder->to(_torchDevice); + + } App::~App() @@ -180,13 +197,18 @@ void App::loop() // upload to GPU and permute to BCHW torch::Tensor pixelBufferGpu = pixelBuffer.to(_torchDevice); pixelBufferGpu = pixelBufferGpu.permute({0,3,1,2}); - - const auto tempsz = pixelBufferGpu.sizes(); // encode torch::Tensor encoding = _frameEncoder(pixelBufferGpu); - - // TODO: check encoding sanity with decoder + + // Check sanity with decoder + torch::Tensor decoding = _frameDecoder(encoding); + decoding = decoding.permute({0,2,3,1}).contiguous(); + + cv::Mat decodingOpencv(480, 640, CV_32FC4); + copyFromTensor(decoding.to(torch::kCPU), (float*)decodingOpencv.ptr(0), 640*480*4); + + cv::imshow("app-decoding", decodingOpencv); // store encoding to the sequence storage copyFromTensor(encoding.to(torch::kCPU), batch.encodings[_batchEntryId], encodingLength); @@ -247,6 +269,7 @@ void App::loop() } // Train +#if 0 if (_newPatchReady) { // Create copy of the sequence storage auto sequenceStorageCopy(_sequenceStorage); @@ -256,7 +279,7 @@ void App::loop() _model.trainAsync(std::move(sequenceStorageCopy)); _newPatchReady = false; } - +#endif ++_frameId; } } From a3d39f254879429c2b7596fa63c4e21490e4d109 Mon Sep 17 00:00:00 2001 From: Lehdari Date: Sat, 14 Jan 2023 18:34:08 +0200 Subject: [PATCH 06/12] SequenceStorage: Add reset() --- include/SequenceStorage.hpp | 3 +++ src/SequenceStorage.cpp | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/SequenceStorage.hpp b/include/SequenceStorage.hpp index 7f88bdc..c9277af 100644 --- a/include/SequenceStorage.hpp +++ b/include/SequenceStorage.hpp @@ -97,6 +97,9 @@ class SequenceStorage { const Settings& settings() const noexcept; + // Reinitialize all data to default values (0 and such) + void reset(); + private: Settings _settings; uint64_t _frameSize; // size of a frame in elements diff --git a/src/SequenceStorage.cpp b/src/SequenceStorage.cpp index e46e5fd..18765d0 100644 --- a/src/SequenceStorage.cpp +++ b/src/SequenceStorage.cpp @@ -213,3 +213,14 @@ const SequenceStorage::Settings& SequenceStorage::settings() const noexcept { return _settings; } + +void SequenceStorage::reset() +{ + for (auto& action : _actions) { + action = gvizdoom::Action(); + } + + std::fill(_frameData.begin(), _frameData.end(), 0.0f); + std::fill(_encodingData.begin(), _encodingData.end(), 0.0f); + std::fill(_rewards.begin(), _rewards.end(), 0.0); +} From 8208aa8da839e18eb83aa052d0b7739927c8e83a Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 18:26:31 +0200 Subject: [PATCH 07/12] App: add RO5 --- include/App.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/App.hpp b/include/App.hpp index 469cc9c..58c7524 100644 --- a/include/App.hpp +++ b/include/App.hpp @@ -27,7 +27,11 @@ class App { public: App(); - // TODO RO5 + App(const App&) = delete; + App(App&&) = delete; + App& operator=(const App&) = delete; + App& operator=(App&&) = delete; + ~App(); void loop(); From bcfd4606b7dc1e229619f74ebfa5c8c64d623a37 Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 18:27:15 +0200 Subject: [PATCH 08/12] RewardModelTrainer: create a training routine for reward model --- .gitignore | 3 +++ CMakeLists.txt | 1 + include/App.hpp | 10 ++++++---- include/ModelProto.hpp | 2 ++ include/RewardModelTrainer.hpp | 18 ++++++++++++++++++ src/App.cpp | 23 ++++++++++++++--------- src/RewardModelTrainer.cpp | 0 7 files changed, 44 insertions(+), 13 deletions(-) create mode 100644 include/RewardModelTrainer.hpp create mode 100644 src/RewardModelTrainer.cpp diff --git a/.gitignore b/.gitignore index 9c5fd58..8cc2bb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ cmake-build-*/ *.code-workspace *.pk3 src/gitinfo.h +*.pt +models/ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt index c9f7eb7..d30db77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ set(DOOT2_SOURCES src/HeatmapActionModule.cpp src/ModelProto.cpp src/ResNeXtModule.cpp + src/RewardModelTrainer.cpp src/SequenceStorage.cpp ) diff --git a/include/App.hpp b/include/App.hpp index 58c7524..e63de8e 100644 --- a/include/App.hpp +++ b/include/App.hpp @@ -12,10 +12,11 @@ #include "ActionManager.hpp" -#include "HeatmapActionModule.hpp" #include "DoorTraversalActionModule.hpp" -#include "SequenceStorage.hpp" +#include "HeatmapActionModule.hpp" #include "ModelProto.hpp" +#include "RewardModelTrainer.hpp" +#include "SequenceStorage.hpp" #include #include @@ -57,12 +58,13 @@ class App { size_t _batchEntryId; bool _newPatchReady; - ModelProto _model; + ModelProto _modelEdec; + RewardModelTrainer _modelReward; torch::Device _torchDevice; FrameEncoder _frameEncoder; FrameDecoder _frameDecoder; - + bool _trainRewardModel; void nextMap(); // proceed to next map }; diff --git a/include/ModelProto.hpp b/include/ModelProto.hpp index 23fb450..91ace12 100644 --- a/include/ModelProto.hpp +++ b/include/ModelProto.hpp @@ -14,11 +14,13 @@ #include "FrameDecoder.hpp" #include "FlowDecoder.hpp" + #include #include #include #include #include +#include class SequenceStorage; diff --git a/include/RewardModelTrainer.hpp b/include/RewardModelTrainer.hpp new file mode 100644 index 0000000..9f627bf --- /dev/null +++ b/include/RewardModelTrainer.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include + +class SequenceStorage; + +class RewardModelTrainer { +public: + RewardModelTrainer(); + RewardModelTrainer(const RewardModelTrainer&) = delete; + RewardModelTrainer(RewardModelTrainer&&) = delete; + RewardModelTrainer& operator=(const RewardModelTrainer&) = delete; + RewardModelTrainer& operator=(RewardModelTrainer&&) = delete; + + void train(SequenceStorage& storage); +private: + torch::optim::Adam _optimizer; +} \ No newline at end of file diff --git a/src/App.cpp b/src/App.cpp index df753c0..13a6728 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -43,7 +43,8 @@ App::App() : _frameId (0), _batchEntryId (0), _newPatchReady (false), - _torchDevice (torch::cuda::is_available() ? kCUDA : kCPU) + _torchDevice (torch::cuda::is_available() ? kCUDA : kCPU), + _trainRewardModel (true) { auto& doomGame = DoomGame::instance(); @@ -269,17 +270,21 @@ void App::loop() } // Train -#if 0 if (_newPatchReady) { - // Create copy of the sequence storage - auto sequenceStorageCopy(_sequenceStorage); - - printf("Training...\n"); - _model.waitForTrainingFinish(); - _model.trainAsync(std::move(sequenceStorageCopy)); + if (_trainRewardModel) { + // asd + // + } else { + // Create copy of the sequence storage + auto sequenceStorageCopy(_sequenceStorage); + + printf("Training...\n"); + _modelEdec.waitForTrainingFinish(); + _modelEdec.trainAsync(std::move(sequenceStorageCopy)); + } _newPatchReady = false; } -#endif + ++_frameId; } } diff --git a/src/RewardModelTrainer.cpp b/src/RewardModelTrainer.cpp new file mode 100644 index 0000000..e69de29 From 147b7b2c5be81d4281595c2d53c7e26d4c91c384 Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sat, 14 Jan 2023 20:52:14 +0200 Subject: [PATCH 09/12] RewardModelTrainer: fill tensor with data --- CMakeLists.txt | 7 ++-- include/RewardModel.hpp | 13 +++++++ include/RewardModelTrainer.hpp | 10 ++++-- include/SequenceStorage.hpp | 7 ++++ src/App.cpp | 3 +- src/RewardModel.cpp | 14 ++++++++ src/RewardModelTrainer.cpp | 65 ++++++++++++++++++++++++++++++++++ src/SequenceStorage.cpp | 18 ++++++++++ 8 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 include/RewardModel.hpp create mode 100644 src/RewardModel.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d30db77..dd989f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,9 +3,9 @@ project(DooT2) # required for env with a RTX 4090 -set(CMAKE_CUDA_ARCHITECTURES 89) -set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) -set(CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8) +# set(CMAKE_CUDA_ARCHITECTURES 89) +# set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) +# set(CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8) # Add external dependencies @@ -33,6 +33,7 @@ set(DOOT2_SOURCES src/HeatmapActionModule.cpp src/ModelProto.cpp src/ResNeXtModule.cpp + src/RewardModel.cpp src/RewardModelTrainer.cpp src/SequenceStorage.cpp ) diff --git a/include/RewardModel.hpp b/include/RewardModel.hpp new file mode 100644 index 0000000..96eb7e2 --- /dev/null +++ b/include/RewardModel.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include + +class RewardModelImpl : public torch::nn::Module { +public: + RewardModelImpl(); + torch::Tensor forward(torch::Tensor x); +private: +// layers here + torch::nn::Conv2d _conv1;//tmp +}; +TORCH_MODULE(RewardModel); diff --git a/include/RewardModelTrainer.hpp b/include/RewardModelTrainer.hpp index 9f627bf..ebbc78d 100644 --- a/include/RewardModelTrainer.hpp +++ b/include/RewardModelTrainer.hpp @@ -1,5 +1,8 @@ #pragma once +#include "ActionConverter.hpp" +#include "RewardModel.hpp" + #include class SequenceStorage; @@ -14,5 +17,8 @@ class RewardModelTrainer { void train(SequenceStorage& storage); private: - torch::optim::Adam _optimizer; -} \ No newline at end of file + RewardModel _rewardModel; + torch::optim::Adam _optimizer; + float _learningRate{1e-3}; + ActionConverter _actionConverter; +}; \ No newline at end of file diff --git a/include/SequenceStorage.hpp b/include/SequenceStorage.hpp index c9277af..c214825 100644 --- a/include/SequenceStorage.hpp +++ b/include/SequenceStorage.hpp @@ -44,6 +44,9 @@ class SequenceStorage { // Map encoding data to a torch tensor (BW) const torch::Tensor mapEncodingData(); + // Map rewards to a torch tensor (B) + const torch::Tensor mapRewards(); + friend class SequenceStorage; private: @@ -58,6 +61,7 @@ class SequenceStorage { float* const _frameData; float* const _encodingData; + const SequenceStorage::Settings& _settings; }; @@ -95,6 +99,9 @@ class SequenceStorage { // Map encoding data to a torch tensor (LBW) const torch::Tensor mapEncodingData(); + // Map rewards to a torch tensor (LB) + const torch::Tensor mapRewards(); + const Settings& settings() const noexcept; // Reinitialize all data to default values (0 and such) diff --git a/src/App.cpp b/src/App.cpp index 13a6728..b40bf91 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -272,8 +272,7 @@ void App::loop() // Train if (_newPatchReady) { if (_trainRewardModel) { - // asd - // + _modelReward.train(_sequenceStorage); } else { // Create copy of the sequence storage auto sequenceStorageCopy(_sequenceStorage); diff --git a/src/RewardModel.cpp b/src/RewardModel.cpp new file mode 100644 index 0000000..279d2cf --- /dev/null +++ b/src/RewardModel.cpp @@ -0,0 +1,14 @@ +#include "RewardModel.hpp" + +using namespace torch; + +RewardModelImpl::RewardModelImpl() : +_conv1 (nn::Conv2dOptions(4, 16, {4, 4}).stride({2, 2}).bias(false).padding(1)) /*tmp*/ +{ + register_module("conv1", _conv1); +} + +torch::Tensor RewardModelImpl::forward(torch::Tensor x) +{ + return _conv1(x); //tmp +} \ No newline at end of file diff --git a/src/RewardModelTrainer.cpp b/src/RewardModelTrainer.cpp index e69de29..4ea415b 100644 --- a/src/RewardModelTrainer.cpp +++ b/src/RewardModelTrainer.cpp @@ -0,0 +1,65 @@ +#include "RewardModelTrainer.hpp" + +#include "Constants.hpp" +#include "SequenceStorage.hpp" + +using namespace doot2; +using namespace gvizdoom; + + +RewardModelTrainer::RewardModelTrainer() : + _optimizer( + {_rewardModel->parameters()}, + torch::optim::AdamOptions(_learningRate).betas({0.9, 0.999}) + ) +{ + // TODO load reward model from file if it is not found on the disk + + printf("Reward model constructor\n"); // tmp + + _actionConverter.setAngleIndex(0); + _actionConverter.setKeyIndex(1, Action::Key::ACTION_FORWARD); + _actionConverter.setKeyIndex(2, Action::Key::ACTION_BACK); + _actionConverter.setKeyIndex(3, Action::Key::ACTION_LEFT); + _actionConverter.setKeyIndex(4, Action::Key::ACTION_RIGHT); + _actionConverter.setKeyIndex(5, Action::Key::ACTION_USE); + +} + +void RewardModelTrainer::train(SequenceStorage& storage) +{ + torch::Tensor encodings = storage.mapEncodingData(); // LBW + + torch::Tensor actions(torch::zeros( + {static_cast(storage.settings().length), + storage.settings().batchSize, + actionVectorLength})); // LBW where W=6 + + torch::Tensor rewards = storage.mapRewards().toType(torch::kF32); // LB + + auto* actionsPtr = actions.data_ptr(); + + for (size_t t = 0; t < storage.settings().length; ++t) { + for (size_t bi = 0; bi < storage.settings().batchSize; ++bi) { + auto batch = storage[bi]; + auto action = batch.actions[t]; + auto actionVector = _actionConverter(action, actionVectorLength); + + std::copy(actionVector.begin(), actionVector.end(), + actionsPtr + t*storage.settings().batchSize + bi); + + } + } + +#if 0 + // Temp: perform one training step + { + _rewardModel->zero_grad(); + + torch::Tensor y = _rewardModel->forward(); + torch::Tensor loss = torch::l2_loss(y,target); + loss.backward(); + _optimizer.step(); + } +#endif +} diff --git a/src/SequenceStorage.cpp b/src/SequenceStorage.cpp index 18765d0..b100626 100644 --- a/src/SequenceStorage.cpp +++ b/src/SequenceStorage.cpp @@ -61,6 +61,15 @@ const torch::Tensor SequenceStorage::BatchHandle::mapEncodingData() } } +const torch::Tensor SequenceStorage::BatchHandle::mapRewards() +{ + return torch::from_blob( + rewards, + { _settings.batchSize }, + torch::TensorOptions().device(torch::kCPU).dtype(torch::kF64) + ); +} + SequenceStorage::ConstBatchHandle::ConstBatchHandle( const gvizdoom::Action* actions, const Image* frames, @@ -209,6 +218,15 @@ const torch::Tensor SequenceStorage::mapEncodingData() } } +const torch::Tensor SequenceStorage::mapRewards() +{ + return torch::from_blob( + _rewards.data(), + { _settings.length, _settings.batchSize }, + torch::TensorOptions().device(torch::kCPU).dtype(torch::kF64) + ); +} + const SequenceStorage::Settings& SequenceStorage::settings() const noexcept { return _settings; From 1c983a8c4b0d065014949e738d9d15aca89343ff Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sun, 15 Jan 2023 00:07:48 +0200 Subject: [PATCH 10/12] RewardModelTrainer: implement LSTM inference --- include/RewardModel.hpp | 11 ++++++++--- src/App.cpp | 4 ++-- src/RewardModel.cpp | 34 +++++++++++++++++++++++++++++----- src/RewardModelTrainer.cpp | 16 +++++++++------- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/include/RewardModel.hpp b/include/RewardModel.hpp index 96eb7e2..a1b41f2 100644 --- a/include/RewardModel.hpp +++ b/include/RewardModel.hpp @@ -1,13 +1,18 @@ #pragma once #include +#include class RewardModelImpl : public torch::nn::Module { public: RewardModelImpl(); - torch::Tensor forward(torch::Tensor x); + torch::Tensor forward( + torch::Tensor encodings, + torch::Tensor actions, + torch::Tensor rewards); private: -// layers here - torch::nn::Conv2d _conv1;//tmp + const int64_t _inputSize; + const int64_t _hiddenSize; + torch::nn::LSTM _lstm; }; TORCH_MODULE(RewardModel); diff --git a/src/App.cpp b/src/App.cpp index b40bf91..3da7cd2 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -135,7 +135,7 @@ void App::loop() Vec2f playerPosScreen(0.0f, 0.0f); - size_t recordBeginFrameId = 768+_rnd()%512; + size_t recordBeginFrameId = 16+_rnd()%16; size_t recordEndFrameId = recordBeginFrameId+64; // BHWC @@ -163,7 +163,7 @@ void App::loop() // Update the game state, restart if required if (_frameId >= recordEndFrameId || doomGame.update(action)) { nextMap(); - recordBeginFrameId = 768+_rnd()%512; + recordBeginFrameId = 16+_rnd()%16; recordEndFrameId = recordBeginFrameId+64; continue; } diff --git a/src/RewardModel.cpp b/src/RewardModel.cpp index 279d2cf..853eba5 100644 --- a/src/RewardModel.cpp +++ b/src/RewardModel.cpp @@ -1,14 +1,38 @@ #include "RewardModel.hpp" +#include "Constants.hpp" + +#include + +using namespace doot2; using namespace torch; RewardModelImpl::RewardModelImpl() : -_conv1 (nn::Conv2dOptions(4, 16, {4, 4}).stride({2, 2}).bias(false).padding(1)) /*tmp*/ + _inputSize(actionVectorLength + encodingLength), + _hiddenSize(encodingLength + 1), + _lstm(_inputSize, _hiddenSize) { - register_module("conv1", _conv1); + register_module("lstm", _lstm); } -torch::Tensor RewardModelImpl::forward(torch::Tensor x) +// Action at time step t takes us to state t+1 and gives reward t+1 +torch::Tensor RewardModelImpl::forward( + torch::Tensor encodings, /*LBW*/ + torch::Tensor actions, /*LBW*/ + torch::Tensor rewards /*LB*/ +) { - return _conv1(x); //tmp -} \ No newline at end of file + // Input: encoding and action + // Output: encoding and reward + + torch::Tensor input = torch::cat({encodings, actions}, 2); + + // (output), (h_n, c_n) + std::tuple> outputRaw = _lstm(input); + torch::Tensor output = std::get<0>(outputRaw); + + // 64 16 2049 0 + // printf("Output sizes: %ld %ld %ld %ld\n", output.sizes()[0], output.sizes()[1], output.sizes()[2], output.sizes()[3]); + + return output; +} diff --git a/src/RewardModelTrainer.cpp b/src/RewardModelTrainer.cpp index 4ea415b..d6689b8 100644 --- a/src/RewardModelTrainer.cpp +++ b/src/RewardModelTrainer.cpp @@ -28,8 +28,12 @@ RewardModelTrainer::RewardModelTrainer() : void RewardModelTrainer::train(SequenceStorage& storage) { + using namespace torch::indexing; torch::Tensor encodings = storage.mapEncodingData(); // LBW - + torch::Tensor encodingsShifted{torch::zeros({storage.settings().length, storage.settings().batchSize, encodingLength})}; + encodingsShifted.index({Slice(1, None), Slice(), Slice()}) = encodings.index({Slice(None, -1), Slice(), Slice()}); + + torch::Tensor actions(torch::zeros( {static_cast(storage.settings().length), storage.settings().batchSize, @@ -51,15 +55,13 @@ void RewardModelTrainer::train(SequenceStorage& storage) } } -#if 0 // Temp: perform one training step { _rewardModel->zero_grad(); - torch::Tensor y = _rewardModel->forward(); - torch::Tensor loss = torch::l2_loss(y,target); - loss.backward(); - _optimizer.step(); + torch::Tensor y = _rewardModel->forward(encodings, actions, rewards); + // torch::Tensor loss = torch::l2_loss(y,target); + // loss.backward(); + // _optimizer.step(); } -#endif } From 38caa763e7370311e02b059248271910269f028a Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Sun, 15 Jan 2023 00:34:29 +0200 Subject: [PATCH 11/12] RewardModelTrainer: implement loss for LSTM --- include/TensorUtils.hpp | 6 ++++++ src/App.cpp | 3 ++- src/RewardModelTrainer.cpp | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/include/TensorUtils.hpp b/include/TensorUtils.hpp index 2d3e664..ce9a2fe 100644 --- a/include/TensorUtils.hpp +++ b/include/TensorUtils.hpp @@ -183,3 +183,9 @@ INLINE void copyToTensor(const std::vector& vector, torch::Tensor& tenso memcpy(tensor.data_ptr(), vector.data(), vector.size()*sizeof(T_Data)); } } + +INLINE void printTensor(const torch::Tensor& t, const std::string& msg = std::string()) { + printf("%s: %ld %ld %ld %ld\n", + msg.c_str(), + t.sizes()[0], t.sizes()[1], t.sizes()[2], t.sizes()[3]); +} \ No newline at end of file diff --git a/src/App.cpp b/src/App.cpp index 3da7cd2..9713498 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -203,6 +203,7 @@ void App::loop() torch::Tensor encoding = _frameEncoder(pixelBufferGpu); // Check sanity with decoder +#if CHECK_SANITY_DECODER torch::Tensor decoding = _frameDecoder(encoding); decoding = decoding.permute({0,2,3,1}).contiguous(); @@ -210,7 +211,7 @@ void App::loop() copyFromTensor(decoding.to(torch::kCPU), (float*)decodingOpencv.ptr(0), 640*480*4); cv::imshow("app-decoding", decodingOpencv); - +#endif // store encoding to the sequence storage copyFromTensor(encoding.to(torch::kCPU), batch.encodings[_batchEntryId], encodingLength); diff --git a/src/RewardModelTrainer.cpp b/src/RewardModelTrainer.cpp index d6689b8..bf332bf 100644 --- a/src/RewardModelTrainer.cpp +++ b/src/RewardModelTrainer.cpp @@ -40,6 +40,8 @@ void RewardModelTrainer::train(SequenceStorage& storage) actionVectorLength})); // LBW where W=6 torch::Tensor rewards = storage.mapRewards().toType(torch::kF32); // LB + rewards = torch::unsqueeze(rewards, 2); + auto* actionsPtr = actions.data_ptr(); @@ -59,9 +61,37 @@ void RewardModelTrainer::train(SequenceStorage& storage) { _rewardModel->zero_grad(); + // Size: seqLen x batchSize x (encodingLen + 1) torch::Tensor y = _rewardModel->forward(encodings, actions, rewards); - // torch::Tensor loss = torch::l2_loss(y,target); - // loss.backward(); - // _optimizer.step(); + + torch::Tensor yEncodings = y.index({Slice(), Slice(), Slice(None, -1)}); + torch::Tensor yRewards = y.index({Slice(), Slice(), Slice(-1)}); + + printTensor(yEncodings, "yenc"); + printTensor(encodings, "enc"); + + printTensor(yRewards, "yrew"); + printTensor(rewards, "rew"); + + // yenc: 64 16 2048 0 + // enc: 64 16 2048 0 + + // yrew: 64 16 1 0 + // rew: 64 16 0 0 + + auto lossEnc = torch::mse_loss(yEncodings, encodings); + printTensor(lossEnc, "lossEnc"); + auto lossReward = torch::mse_loss(yRewards, rewards); + printTensor(lossReward, "lossReward"); + + auto loss = lossEnc + lossReward; + + printf("Loss: %.5f + %.5f = %.5f\n", + lossEnc.item(), + lossReward.item(), + loss.item()); + + loss.backward(); + _optimizer.step(); } } From 6216361d85b806bf8967ba50e44984f4f57c720b Mon Sep 17 00:00:00 2001 From: Eljas Hyyrynen Date: Tue, 17 Jan 2023 20:34:35 +0200 Subject: [PATCH 12/12] fix leftover merge conflict --- src/App.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 9713498..311253d 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -16,10 +16,6 @@ #include "Constants.hpp" -<<<<<<< HEAD -using namespace doot2; -using namespace gvizdoom; -======= using namespace doot2; using namespace gvizdoom; @@ -27,7 +23,6 @@ using namespace torch; using namespace torch::indexing; namespace fs = std::filesystem; ->>>>>>> add encodings and heatmap-based rewards to sequence storage App::App() : _rnd (1507715517),