Lehdari · Hyrtsi · Jan 14, 2023 · Jan 14, 2023 · Jan 14, 2023 · Jan 14, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,8 @@
 .idea/
 cmake-build-*/
+*.code-workspace
+*.pk3
+src/gitinfo.h
+*.pt
+models/
+build/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,9 +3,9 @@ project(DooT2)
 
 
 # required for env with a RTX 4090
-set(CMAKE_CUDA_ARCHITECTURES 89)
-set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
-set(CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8)
+# set(CMAKE_CUDA_ARCHITECTURES 89)
+# set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
+# set(CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8)
 
 
 # Add external dependencies
@@ -33,6 +33,8 @@ set(DOOT2_SOURCES
     src/HeatmapActionModule.cpp
     src/ModelProto.cpp
     src/ResNeXtModule.cpp
+    src/RewardModel.cpp
+    src/RewardModelTrainer.cpp
     src/SequenceStorage.cpp
 )
 

diff --git a/include/App.hpp b/include/App.hpp
@@ -12,10 +12,11 @@
 
 
 #include "ActionManager.hpp"
-#include "HeatmapActionModule.hpp"
 #include "DoorTraversalActionModule.hpp"
-#include "SequenceStorage.hpp"
+#include "HeatmapActionModule.hpp"
 #include "ModelProto.hpp"
+#include "RewardModelTrainer.hpp"
+#include "SequenceStorage.hpp"
 
 #include <SDL.h>
 #include <opencv2/core/mat.hpp>
@@ -27,7 +28,11 @@
 class App {
 public:
     App();
-    // TODO RO5
+    App(const App&) = delete;
+    App(App&&) = delete;
+    App& operator=(const App&) = delete;
+    App& operator=(App&&) = delete;
+
     ~App();
 
     void loop();
@@ -53,8 +58,13 @@ class App {
     size_t                      _batchEntryId;
     bool                        _newPatchReady;
 
-    ModelProto                  _model;
-
+    ModelProto                  _modelEdec;
+    RewardModelTrainer          _modelReward;
 
+    torch::Device               _torchDevice;
+    FrameEncoder                _frameEncoder;
+    FrameDecoder                _frameDecoder;
+    bool                        _trainRewardModel;
+
     void nextMap(); // proceed to next map
 };
diff --git a/include/ModelProto.hpp b/include/ModelProto.hpp
@@ -14,11 +14,13 @@
 #include "FrameDecoder.hpp"
 #include "FlowDecoder.hpp"
 
+
 #include <vector>
 #include <memory>
 #include <atomic>
 #include <mutex>
 #include <thread>
+#include <torch/torch.h>
 
 
 class SequenceStorage;

diff --git a/include/RewardModel.hpp b/include/RewardModel.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <torch/torch.h>
+#include <torch/nn/modules/rnn.h>
+
+class RewardModelImpl : public torch::nn::Module {
+public:
+    RewardModelImpl();
+    torch::Tensor forward(
+        torch::Tensor encodings,
+        torch::Tensor actions,
+        torch::Tensor rewards);
+private:
+    const int64_t _inputSize;
+    const int64_t _hiddenSize;
+    torch::nn::LSTM             _lstm;
+};
+TORCH_MODULE(RewardModel);
diff --git a/include/RewardModelTrainer.hpp b/include/RewardModelTrainer.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "ActionConverter.hpp"
+#include "RewardModel.hpp"
+
+#include <torch/torch.h>
+
+class SequenceStorage;
+
+class RewardModelTrainer {
+public:
+    RewardModelTrainer();
+    RewardModelTrainer(const RewardModelTrainer&) = delete;
+    RewardModelTrainer(RewardModelTrainer&&) = delete;
+    RewardModelTrainer& operator=(const RewardModelTrainer&) = delete;
+    RewardModelTrainer& operator=(RewardModelTrainer&&) = delete;
+
+    void train(SequenceStorage& storage);
+private:
+    RewardModel                 _rewardModel;
+    torch::optim::Adam          _optimizer;
+    float                       _learningRate{1e-3};
+    ActionConverter<float>      _actionConverter;
+};
diff --git a/include/SequenceStorage.hpp b/include/SequenceStorage.hpp
@@ -44,6 +44,9 @@ class SequenceStorage {
         // Map encoding data to a torch tensor (BW)
         const torch::Tensor mapEncodingData();
 
+        // Map rewards to a torch tensor (B)
+        const torch::Tensor mapRewards();
+
         friend class SequenceStorage;
 
     private:
@@ -58,6 +61,7 @@ class SequenceStorage {
 
         float* const                        _frameData;
         float* const                        _encodingData;
+
         const SequenceStorage::Settings&    _settings;
     };
 
@@ -95,8 +99,14 @@ class SequenceStorage {
     // Map encoding data to a torch tensor (LBW)
     const torch::Tensor mapEncodingData();
 
+    // Map rewards to a torch tensor (LB)
+    const torch::Tensor mapRewards();
+
     const Settings& settings() const noexcept;
 
+    // Reinitialize all data to default values (0 and such)
+    void reset();
+
 private:
     Settings                        _settings;
     uint64_t                        _frameSize;     // size of a frame in elements

diff --git a/include/TensorUtils.hpp b/include/TensorUtils.hpp
@@ -183,3 +183,9 @@ INLINE void copyToTensor(const std::vector<T_Data>& vector, torch::Tensor& tenso
         memcpy(tensor.data_ptr<T_Data>(), vector.data(), vector.size()*sizeof(T_Data));
     }
 }
+
+INLINE void printTensor(const torch::Tensor& t, const std::string& msg = std::string()) {
+    printf("%s: %ld %ld %ld %ld\n",
+        msg.c_str(),
+        t.sizes()[0], t.sizes()[1], t.sizes()[2], t.sizes()[3]);
+}
diff --git a/src/App.cpp b/src/App.cpp
@@ -9,15 +9,20 @@
 //
 
 #include "App.hpp"
-
+#include "Constants.hpp"
+#include <filesystem>
 #include "gvizdoom/DoomGame.hpp"
-
 #include <opencv2/highgui.hpp>
 
 #include "Constants.hpp"
 
+
 using namespace doot2;
 using namespace gvizdoom;
+using namespace torch;
+using namespace torch::indexing;
+namespace fs = std::filesystem;
+
 
 App::App() :
     _rnd                        (1507715517),
@@ -27,12 +32,14 @@ App::App() :
     _quit                       (false),
     _heatmapActionModule        (HeatmapActionModule::Settings{256, 32.0f}),
     _doorTraversalActionModule  (false),
-    _sequenceStorage            (SequenceStorage::Settings{batchSize, sequenceLength, true, false, frameWidth, frameHeight, ImageFormat::BGRA}),
+    _sequenceStorage            (SequenceStorage::Settings{batchSize, sequenceLength, false, true, 0, 0, ImageFormat::BGRA, encodingLength}),
     _positionPlot               (1024, 1024, CV_32FC3, cv::Scalar(0.0f)),
     _initPlayerPos              (0.0f, 0.0f),
     _frameId                    (0),
     _batchEntryId               (0),
-    _newPatchReady              (false)
+    _newPatchReady              (false),
+    _torchDevice                (torch::cuda::is_available() ? kCUDA : kCPU),
+    _trainRewardModel           (true)
 {
     auto& doomGame = DoomGame::instance();
 
@@ -71,6 +78,38 @@ App::App() :
     // Setup ActionManager
     _actionManager.addModule(&_doorTraversalActionModule);
     _actionManager.addModule(&_heatmapActionModule);
+
+    // Load frame encoder
+    if (fs::exists(frameEncoderFilename)) {
+        printf("App: Loading frame encoder model from %s\n", frameEncoderFilename); // TODO logging
+        serialize::InputArchive inputArchive;
+        inputArchive.load_from(frameEncoderFilename);
+        _frameEncoder->load(inputArchive);
+        // Use the inference mode
+        _frameEncoder->eval();
+    }
+    else {
+        printf("No %s found. Initializing a new frame encoder model.\n", frameEncoderFilename); // TODO logging
+    }
+
+    _frameEncoder->to(_torchDevice);
+
+    // Load frame decoder
+    if (fs::exists(frameDecoderFilename)) {
+        printf("App: Loading frame encoder model from %s\n", frameDecoderFilename); // TODO logging
+        serialize::InputArchive inputArchive;
+        inputArchive.load_from(frameDecoderFilename);
+        _frameDecoder->load(inputArchive);
+        // Use the inference mode
+        _frameDecoder->eval();
+    }
+    else {
+        printf("No %s found. Initializing a new frame encoder model.\n", frameDecoderFilename); // TODO logging
+    }
+
+    _frameDecoder->to(_torchDevice);
+
+
 }
 
 App::~App()
@@ -91,9 +130,12 @@ void App::loop()
 
     Vec2f playerPosScreen(0.0f, 0.0f);
 
-    size_t recordBeginFrameId = 768+_rnd()%512;
+    size_t recordBeginFrameId = 16+_rnd()%16;
     size_t recordEndFrameId = recordBeginFrameId+64;
 
+    // BHWC
+    torch::Tensor pixelBuffer{torch::zeros({1, 480, 640, 4})};
+
     while (!_quit) {
         while(SDL_PollEvent(&event)) {
             if (event.type == SDL_QUIT ||
@@ -116,7 +158,7 @@ void App::loop()
         // Update the game state, restart if required
         if (_frameId >= recordEndFrameId || doomGame.update(action)) {
             nextMap();
-            recordBeginFrameId = 768+_rnd()%512;
+            recordBeginFrameId = 16+_rnd()%16;
             recordEndFrameId = recordBeginFrameId+64;
             continue;
         }
@@ -136,10 +178,44 @@ void App::loop()
             auto recordFrameId = _frameId - recordBeginFrameId;
             auto batch = _sequenceStorage[recordFrameId];
             batch.actions[_batchEntryId] = action;
-            Image<uint8_t> frame(doomGame.getScreenWidth(), doomGame.getScreenHeight(), ImageFormat::BGRA);
-            frame.copyFrom(doomGame.getPixelsBGRA());
-            convertImage(frame, batch.frames[_batchEntryId]);
-            batch.rewards[_batchEntryId] = 0.0; // TODO no rewards for now
+
+            // Convert the game frame from uint8 to float
+            const auto imageFormat{ImageFormat::BGRA};
+            Image<uint8_t> frameUint8(doomGame.getScreenWidth(), doomGame.getScreenHeight(), imageFormat);
+            Image<float> frameFloat(doomGame.getScreenWidth(), doomGame.getScreenHeight(), imageFormat);
+            frameUint8.copyFrom(doomGame.getPixelsBGRA());
+            convertImage(frameUint8, frameFloat);
+
+            // Copy the float frame to a torch::Tensor
+            const auto nPixels = doomGame.getScreenWidth() * doomGame.getScreenHeight() * getImageFormatNChannels(imageFormat);
+            copyToTensor(frameFloat.data(), nPixels, pixelBuffer);
+
+            // upload to GPU and permute to BCHW
+            torch::Tensor pixelBufferGpu = pixelBuffer.to(_torchDevice);            
+            pixelBufferGpu = pixelBufferGpu.permute({0,3,1,2});
+
+            // encode
+            torch::Tensor encoding = _frameEncoder(pixelBufferGpu);
+
+            // Check sanity with decoder
+#if CHECK_SANITY_DECODER
+            torch::Tensor decoding = _frameDecoder(encoding);
+            decoding = decoding.permute({0,2,3,1}).contiguous();
+
+            cv::Mat decodingOpencv(480, 640, CV_32FC4);
+            copyFromTensor(decoding.to(torch::kCPU), (float*)decodingOpencv.ptr<float>(0), 640*480*4);
+
+            cv::imshow("app-decoding", decodingOpencv);
+#endif
+            // store encoding to the sequence storage
+            copyFromTensor(encoding.to(torch::kCPU), batch.encodings[_batchEntryId], encodingLength);
+
+            // Update relative player position
+            playerPosRelative(0) = doomGame.getGameState<GameState::PlayerPos>()(0) - _initPlayerPos(0);
+            playerPosRelative(1) = _initPlayerPos(1) - doomGame.getGameState<GameState::PlayerPos>()(1); // invert y
+
+            // Reward is negative heatmap value
+            batch.rewards[_batchEntryId] = -_heatmapActionModule.sample(playerPosRelative, true);
         }
 
         // Render screen
@@ -191,12 +267,16 @@ void App::loop()
 
         // Train
         if (_newPatchReady) {
-            // Create copy of the sequence storage
-            auto sequenceStorageCopy(_sequenceStorage);
-
-            printf("Training...\n");
-            _model.waitForTrainingFinish();
-            _model.trainAsync(std::move(sequenceStorageCopy));
+            if (_trainRewardModel) {
+                _modelReward.train(_sequenceStorage);
+            } else {
+                // Create copy of the sequence storage
+                auto sequenceStorageCopy(_sequenceStorage);
+
+                printf("Training...\n");
+                _modelEdec.waitForTrainingFinish();
+                _modelEdec.trainAsync(std::move(sequenceStorageCopy));
+            }
             _newPatchReady = false;
         }
 

diff --git a/src/ModelProto.cpp b/src/ModelProto.cpp
@@ -9,6 +9,8 @@
 //
 
 #include "ModelProto.hpp"
+
+#include "Constants.hpp"
 #include "SequenceStorage.hpp"
 
 #include <opencv2/core/mat.hpp> // TODO temp
@@ -23,7 +25,6 @@ static constexpr double     learningRate            = 1.0e-3; // TODO
 static constexpr int64_t    nTrainingEpochs         = 10;
 
 using namespace doot2;
-
 using namespace torch;
 namespace tf = torch::nn::functional;
 namespace fs = std::filesystem;