From 1bd9ae7f29f0bc97c589f99700e214090a6bbc67 Mon Sep 17 00:00:00 2001 From: Teddy Koker Date: Thu, 26 Nov 2020 13:21:54 -0500 Subject: [PATCH] Upgrade DQN to use .log (#404) * Upgrade DQN to use .log * remove unused * pep8 * fixed other dqn --- pl_bolts/models/rl/double_dqn_model.py | 14 ++------------ pl_bolts/models/rl/dqn_model.py | 18 ++++-------------- pl_bolts/models/rl/per_dqn_model.py | 14 ++------------ 3 files changed, 8 insertions(+), 38 deletions(-) diff --git a/pl_bolts/models/rl/double_dqn_model.py b/pl_bolts/models/rl/double_dqn_model.py index 284c328f2d..150ea14dd9 100644 --- a/pl_bolts/models/rl/double_dqn_model.py +++ b/pl_bolts/models/rl/double_dqn_model.py @@ -65,27 +65,17 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], _) -> OrderedD if self.global_step % self.sync_rate == 0: self.target_net.load_state_dict(self.net.state_dict()) - log = { + self.log_dict({ "total_reward": self.total_rewards[-1], "avg_reward": self.avg_rewards, "train_loss": loss, # "episodes": self.total_episode_steps, - } - status = { - "steps": self.global_step, - "avg_reward": self.avg_rewards, - "total_reward": self.total_rewards[-1], - "episodes": self.done_episodes, - # "episode_steps": self.episode_steps, - "epsilon": self.agent.epsilon, - } + }) return OrderedDict( { "loss": loss, "avg_reward": self.avg_rewards, - "log": log, - "progress_bar": status, } ) diff --git a/pl_bolts/models/rl/dqn_model.py b/pl_bolts/models/rl/dqn_model.py index bbaffff922..6f2460216e 100644 --- a/pl_bolts/models/rl/dqn_model.py +++ b/pl_bolts/models/rl/dqn_model.py @@ -288,28 +288,18 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], _) -> OrderedD if self.global_step % self.sync_rate == 0: self.target_net.load_state_dict(self.net.state_dict()) - log = { + self.log_dict({ "total_reward": self.total_rewards[-1], "avg_reward": self.avg_rewards, "train_loss": loss, "episodes": self.done_episodes, "episode_steps": self.total_episode_steps[-1] - } - status = { - "steps": self.global_step, - "avg_reward": self.avg_rewards, - "total_reward": self.total_rewards[-1], - "episodes": self.done_episodes, - "episode_steps": self.total_episode_steps[-1], - "epsilon": self.agent.epsilon, - } + }) return OrderedDict( { "loss": loss, "avg_reward": self.avg_rewards, - "log": log, - "progress_bar": status, } ) @@ -323,8 +313,8 @@ def test_epoch_end(self, outputs) -> Dict[str, torch.Tensor]: """Log the avg of the test results""" rewards = [x["test_reward"] for x in outputs] avg_reward = sum(rewards) / len(rewards) - tensorboard_logs = {"avg_test_reward": avg_reward} - return {"avg_test_reward": avg_reward, "log": tensorboard_logs} + self.log("avg_test_reward", avg_reward) + return {"avg_test_reward": avg_reward} def configure_optimizers(self) -> List[Optimizer]: """ Initialize Adam optimizer""" diff --git a/pl_bolts/models/rl/per_dqn_model.py b/pl_bolts/models/rl/per_dqn_model.py index 69fe61bbe9..ec8636265e 100644 --- a/pl_bolts/models/rl/per_dqn_model.py +++ b/pl_bolts/models/rl/per_dqn_model.py @@ -130,27 +130,17 @@ def training_step(self, batch, _) -> OrderedDict: if self.global_step % self.sync_rate == 0: self.target_net.load_state_dict(self.net.state_dict()) - log = { + self.log_dict({ "total_reward": self.total_rewards[-1], "avg_reward": self.avg_rewards, "train_loss": loss, # "episodes": self.total_episode_steps, - } - status = { - "steps": self.global_step, - "avg_reward": self.avg_rewards, - "total_reward": self.total_rewards[-1], - "episodes": self.done_episodes, - # "episode_steps": self.episode_steps, - "epsilon": self.agent.epsilon, - } + }) return OrderedDict( { "loss": loss, "avg_reward": self.avg_rewards, - "log": log, - "progress_bar": status, } )