From a8882266412971ae25e38f370353ead854a8ccbd Mon Sep 17 00:00:00 2001 From: Sina Afrooze Date: Mon, 29 Oct 2018 14:46:40 -0700 Subject: [PATCH] Move embedder, middleware, and head parameters to framework agnostic modules. (#45) Part of #28 --- rl_coach/agents/actor_critic_agent.py | 7 +- rl_coach/agents/bc_agent.py | 6 +- rl_coach/agents/categorical_dqn_agent.py | 2 +- rl_coach/agents/cil_agent.py | 6 +- rl_coach/agents/clipped_ppo_agent.py | 7 +- rl_coach/agents/ddpg_agent.py | 7 +- rl_coach/agents/dfp_agent.py | 7 +- rl_coach/agents/dqn_agent.py | 6 +- rl_coach/agents/human_agent.py | 6 +- rl_coach/agents/n_step_q_agent.py | 6 +- rl_coach/agents/naf_agent.py | 6 +- rl_coach/agents/nec_agent.py | 6 +- rl_coach/agents/policy_gradients_agent.py | 6 +- rl_coach/agents/ppo_agent.py | 7 +- rl_coach/agents/qr_dqn_agent.py | 3 +- rl_coach/agents/rainbow_dqn_agent.py | 4 +- rl_coach/architectures/embedder_parameters.py | 41 +++++ rl_coach/architectures/head_parameters.py | 173 ++++++++++++++++++ .../architectures/middleware_parameters.py | 54 ++++++ .../embedders/__init__.py | 4 + .../embedders/embedder.py | 31 +--- .../tensorflow_components/general_network.py | 30 +-- .../tensorflow_components/heads/__init__.py | 29 +++ .../heads/categorical_q_head.py | 12 +- .../tensorflow_components/heads/cil_head.py | 15 +- .../heads/ddpg_actor_head.py | 13 +- .../tensorflow_components/heads/dnd_q_head.py | 11 -- .../heads/dueling_q_head.py | 10 - .../tensorflow_components/heads/head.py | 15 +- .../heads/measurements_prediction_head.py | 13 +- .../tensorflow_components/heads/naf_head.py | 12 +- .../heads/policy_head.py | 13 +- .../tensorflow_components/heads/ppo_head.py | 12 +- .../tensorflow_components/heads/ppo_v_head.py | 13 +- .../tensorflow_components/heads/q_head.py | 13 +- .../heads/quantile_regression_q_head.py | 13 +- .../heads/rainbow_q_head.py | 12 +- .../tensorflow_components/heads/v_head.py | 13 +- .../middlewares/__init__.py | 4 + .../middlewares/fc_middleware.py | 12 +- .../middlewares/lstm_middleware.py | 13 +- .../middlewares/middleware.py | 17 +- rl_coach/presets/Atari_A3C.py | 2 +- rl_coach/presets/Atari_A3C_LSTM.py | 2 +- rl_coach/presets/Atari_Dueling_DDQN.py | 2 +- .../Atari_Dueling_DDQN_with_PER_OpenAI.py | 2 +- rl_coach/presets/BitFlip_DQN.py | 2 +- rl_coach/presets/BitFlip_DQN_HER.py | 2 +- rl_coach/presets/CARLA_CIL.py | 6 +- rl_coach/presets/CARLA_Dueling_DDQN.py | 2 +- rl_coach/presets/CartPole_Dueling_DDQN.py | 2 +- rl_coach/presets/Doom_Basic_Dueling_DDQN.py | 2 +- .../presets/ExplorationChain_Dueling_DDQN.py | 2 +- rl_coach/presets/Fetch_DDPG_HER_baselines.py | 4 +- rl_coach/presets/Mujoco_A3C_LSTM.py | 4 +- rl_coach/presets/Pendulum_HAC.py | 2 +- .../presets/Starcraft_CollectMinerals_A3C.py | 2 +- .../Starcraft_CollectMinerals_Dueling_DDQN.py | 4 +- tutorials/0. Quick Start Guide.ipynb | 2 +- ...Implementing a Hierarchical RL Graph.ipynb | 4 +- 60 files changed, 409 insertions(+), 329 deletions(-) create mode 100644 rl_coach/architectures/embedder_parameters.py create mode 100644 rl_coach/architectures/head_parameters.py create mode 100644 rl_coach/architectures/middleware_parameters.py diff --git a/rl_coach/agents/actor_critic_agent.py b/rl_coach/agents/actor_critic_agent.py index 7c74f1e52..732c7ea8d 100644 --- a/rl_coach/agents/actor_critic_agent.py +++ b/rl_coach/agents/actor_critic_agent.py @@ -20,9 +20,9 @@ import scipy.signal from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler -from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters -from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import PolicyHeadParameters, VHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters from rl_coach.exploration_policies.categorical import CategoricalParameters @@ -31,7 +31,6 @@ from rl_coach.memories.episodic.single_episode_buffer import SingleEpisodeBufferParameters from rl_coach.spaces import DiscreteActionSpace, BoxActionSpace from rl_coach.utils import last_sample -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters class ActorCriticAlgorithmParameters(AlgorithmParameters): diff --git a/rl_coach/agents/bc_agent.py b/rl_coach/agents/bc_agent.py index 7f8e16663..044dd7102 100644 --- a/rl_coach/agents/bc_agent.py +++ b/rl_coach/agents/bc_agent.py @@ -19,13 +19,13 @@ import numpy as np from rl_coach.agents.imitation_agent import ImitationAgent -from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.head_parameters import PolicyHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters from rl_coach.base_parameters import AgentParameters, AlgorithmParameters, NetworkParameters, \ MiddlewareScheme from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters diff --git a/rl_coach/agents/categorical_dqn_agent.py b/rl_coach/agents/categorical_dqn_agent.py index 5af83c127..24a610bfd 100644 --- a/rl_coach/agents/categorical_dqn_agent.py +++ b/rl_coach/agents/categorical_dqn_agent.py @@ -20,7 +20,7 @@ from rl_coach.agents.dqn_agent import DQNNetworkParameters, DQNAlgorithmParameters, DQNAgentParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.categorical_q_head import CategoricalQHeadParameters +from rl_coach.architectures.head_parameters import CategoricalQHeadParameters from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import StateType from rl_coach.exploration_policies.e_greedy import EGreedyParameters diff --git a/rl_coach/agents/cil_agent.py b/rl_coach/agents/cil_agent.py index 1940acff7..a0d4af0a3 100644 --- a/rl_coach/agents/cil_agent.py +++ b/rl_coach/agents/cil_agent.py @@ -17,9 +17,9 @@ from typing import Union from rl_coach.agents.imitation_agent import ImitationAgent -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters -from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import RegressionHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AgentParameters, MiddlewareScheme, NetworkParameters, AlgorithmParameters from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.non_episodic.balanced_experience_replay import BalancedExperienceReplayParameters diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py index 441496614..080525f5a 100644 --- a/rl_coach/agents/clipped_ppo_agent.py +++ b/rl_coach/agents/clipped_ppo_agent.py @@ -23,12 +23,11 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler -from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters -from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import EnvironmentSteps, Batch, EnvResponse, StateType from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters from rl_coach.exploration_policies.categorical import CategoricalParameters diff --git a/rl_coach/agents/ddpg_agent.py b/rl_coach/agents/ddpg_agent.py index 8fa0eb7fa..79d2ef8a3 100644 --- a/rl_coach/agents/ddpg_agent.py +++ b/rl_coach/agents/ddpg_agent.py @@ -22,10 +22,9 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.agent import Agent -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters -from rl_coach.architectures.tensorflow_components.heads.ddpg_actor_head import DDPGActorHeadParameters -from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import DDPGActorHeadParameters, VHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \ AgentParameters, EmbedderScheme from rl_coach.core_types import ActionInfo, EnvironmentSteps diff --git a/rl_coach/agents/dfp_agent.py b/rl_coach/agents/dfp_agent.py index f42f948fb..9fed1c389 100644 --- a/rl_coach/agents/dfp_agent.py +++ b/rl_coach/agents/dfp_agent.py @@ -21,14 +21,13 @@ import numpy as np from rl_coach.agents.agent import Agent +from rl_coach.architectures.head_parameters import MeasurementsPredictionHeadParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense -from rl_coach.architectures.tensorflow_components.heads.measurements_prediction_head import \ - MeasurementsPredictionHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters, \ MiddlewareScheme from rl_coach.core_types import ActionInfo, EnvironmentSteps, RunPhase -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplayParameters from rl_coach.memories.memory import MemoryGranularity diff --git a/rl_coach/agents/dqn_agent.py b/rl_coach/agents/dqn_agent.py index f261a08cd..a60aac22c 100644 --- a/rl_coach/agents/dqn_agent.py +++ b/rl_coach/agents/dqn_agent.py @@ -19,11 +19,11 @@ import numpy as np from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import QHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters, \ MiddlewareScheme -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import EnvironmentSteps from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.memories.non_episodic.experience_replay import ExperienceReplayParameters diff --git a/rl_coach/agents/human_agent.py b/rl_coach/agents/human_agent.py index e55340301..2b920585d 100644 --- a/rl_coach/agents/human_agent.py +++ b/rl_coach/agents/human_agent.py @@ -23,11 +23,11 @@ from rl_coach.agents.agent import Agent from rl_coach.agents.bc_agent import BCNetworkParameters -from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import PolicyHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, EmbedderScheme, \ AgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import ActionInfo from rl_coach.exploration_policies.e_greedy import EGreedyParameters from rl_coach.logger import screen diff --git a/rl_coach/agents/n_step_q_agent.py b/rl_coach/agents/n_step_q_agent.py index 787544b32..1f013147a 100644 --- a/rl_coach/agents/n_step_q_agent.py +++ b/rl_coach/agents/n_step_q_agent.py @@ -20,10 +20,10 @@ from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.q_head import QHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import QHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, NetworkParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import EnvironmentSteps from rl_coach.exploration_policies.e_greedy import EGreedyParameters diff --git a/rl_coach/agents/naf_agent.py b/rl_coach/agents/naf_agent.py index 0401ba50b..df7c60d46 100644 --- a/rl_coach/agents/naf_agent.py +++ b/rl_coach/agents/naf_agent.py @@ -19,11 +19,11 @@ import numpy as np from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.naf_head import NAFHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import NAFHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, AgentParameters, \ NetworkParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import ActionInfo, EnvironmentSteps from rl_coach.exploration_policies.ou_process import OUProcessParameters diff --git a/rl_coach/agents/nec_agent.py b/rl_coach/agents/nec_agent.py index 891466d6e..6f168bbc7 100644 --- a/rl_coach/agents/nec_agent.py +++ b/rl_coach/agents/nec_agent.py @@ -21,10 +21,10 @@ import numpy as np from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.dnd_q_head import DNDQHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import DNDQHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, AgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import RunPhase, EnvironmentSteps, Episode, StateType from rl_coach.exploration_policies.e_greedy import EGreedyParameters diff --git a/rl_coach/agents/policy_gradients_agent.py b/rl_coach/agents/policy_gradients_agent.py index 177da8dc2..7db5fd886 100644 --- a/rl_coach/agents/policy_gradients_agent.py +++ b/rl_coach/agents/policy_gradients_agent.py @@ -19,11 +19,11 @@ import numpy as np from rl_coach.agents.policy_optimization_agent import PolicyOptimizationAgent, PolicyGradientRescaler -from rl_coach.architectures.tensorflow_components.heads.policy_head import PolicyHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import PolicyHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import NetworkParameters, AlgorithmParameters, \ AgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters from rl_coach.exploration_policies.categorical import CategoricalParameters diff --git a/rl_coach/agents/ppo_agent.py b/rl_coach/agents/ppo_agent.py index fdb175ea9..83b6fc40d 100644 --- a/rl_coach/agents/ppo_agent.py +++ b/rl_coach/agents/ppo_agent.py @@ -22,12 +22,11 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgent from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler -from rl_coach.architectures.tensorflow_components.heads.ppo_head import PPOHeadParameters -from rl_coach.architectures.tensorflow_components.heads.v_head import VHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import PPOHeadParameters, VHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import AlgorithmParameters, NetworkParameters, \ AgentParameters, DistributedTaskParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters from rl_coach.core_types import EnvironmentSteps, Batch from rl_coach.exploration_policies.additive_noise import AdditiveNoiseParameters diff --git a/rl_coach/agents/qr_dqn_agent.py b/rl_coach/agents/qr_dqn_agent.py index 479cd8078..ac525ea97 100644 --- a/rl_coach/agents/qr_dqn_agent.py +++ b/rl_coach/agents/qr_dqn_agent.py @@ -20,8 +20,7 @@ from rl_coach.agents.dqn_agent import DQNAgentParameters, DQNNetworkParameters, DQNAlgorithmParameters from rl_coach.agents.value_optimization_agent import ValueOptimizationAgent -from rl_coach.architectures.tensorflow_components.heads.quantile_regression_q_head import \ - QuantileRegressionQHeadParameters +from rl_coach.architectures.head_parameters import QuantileRegressionQHeadParameters from rl_coach.core_types import StateType from rl_coach.schedules import LinearSchedule diff --git a/rl_coach/agents/rainbow_dqn_agent.py b/rl_coach/agents/rainbow_dqn_agent.py index e39024aea..609ea0b4a 100644 --- a/rl_coach/agents/rainbow_dqn_agent.py +++ b/rl_coach/agents/rainbow_dqn_agent.py @@ -21,8 +21,8 @@ from rl_coach.agents.categorical_dqn_agent import CategoricalDQNAlgorithmParameters, \ CategoricalDQNAgent, CategoricalDQNAgentParameters from rl_coach.agents.dqn_agent import DQNNetworkParameters -from rl_coach.architectures.tensorflow_components.heads.rainbow_q_head import RainbowQHeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.head_parameters import RainbowQHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import MiddlewareScheme from rl_coach.exploration_policies.parameter_noise import ParameterNoiseParameters from rl_coach.memories.non_episodic.prioritized_experience_replay import PrioritizedExperienceReplayParameters, \ diff --git a/rl_coach/architectures/embedder_parameters.py b/rl_coach/architectures/embedder_parameters.py new file mode 100644 index 000000000..2731a5272 --- /dev/null +++ b/rl_coach/architectures/embedder_parameters.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List, Union + +from rl_coach.base_parameters import EmbedderScheme, NetworkComponentParameters + + +class InputEmbedderParameters(NetworkComponentParameters): + def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium, + batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None, + input_clipping=None, dense_layer=None, is_training=False): + super().__init__(dense_layer=dense_layer) + self.activation_function = activation_function + self.scheme = scheme + self.batchnorm = batchnorm + self.dropout = dropout + + if input_rescaling is None: + input_rescaling = {'image': 255.0, 'vector': 1.0} + if input_offset is None: + input_offset = {'image': 0.0, 'vector': 0.0} + + self.input_rescaling = input_rescaling + self.input_offset = input_offset + self.input_clipping = input_clipping + self.name = name + self.is_training = is_training diff --git a/rl_coach/architectures/head_parameters.py b/rl_coach/architectures/head_parameters.py new file mode 100644 index 000000000..e29d656f1 --- /dev/null +++ b/rl_coach/architectures/head_parameters.py @@ -0,0 +1,173 @@ +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Type + +from rl_coach.base_parameters import NetworkComponentParameters + + +class HeadParameters(NetworkComponentParameters): + def __init__(self, parameterized_class_name: str, activation_function: str = 'relu', name: str= 'head', + num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0, + loss_weight: float=1.0, dense_layer=None): + super().__init__(dense_layer=dense_layer) + self.activation_function = activation_function + self.name = name + self.num_output_head_copies = num_output_head_copies + self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor + self.loss_weight = loss_weight + self.parameterized_class_name = parameterized_class_name + + +class PPOHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="PPOHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class VHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='v_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="VHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class CategoricalQHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="CategoricalQHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class RegressionHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None, scheme=None): + super().__init__(parameterized_class_name="RegressionHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class DDPGActorHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True, + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="DDPGActor", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + self.batchnorm = batchnorm + + +class DNDQHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="DNDQHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class DuelingQHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="DuelingQHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class MeasurementsPredictionHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="MeasurementsPredictionHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class NAFHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="NAFHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class PolicyHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="PolicyHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class PPOVHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="PPOVHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class QHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="QHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class QuantileRegressionQHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="QuantileRegressionQHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) + + +class RainbowQHeadParameters(HeadParameters): + def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', + num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, + loss_weight: float = 1.0, dense_layer=None): + super().__init__(parameterized_class_name="RainbowQHead", activation_function=activation_function, name=name, + dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, + rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, + loss_weight=loss_weight) diff --git a/rl_coach/architectures/middleware_parameters.py b/rl_coach/architectures/middleware_parameters.py new file mode 100644 index 000000000..711ec069d --- /dev/null +++ b/rl_coach/architectures/middleware_parameters.py @@ -0,0 +1,54 @@ +# +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List, Type, Union + +from rl_coach.base_parameters import MiddlewareScheme, NetworkComponentParameters + + +class MiddlewareParameters(NetworkComponentParameters): + def __init__(self, parameterized_class_name: str, + activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium, + batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=None, is_training=False): + super().__init__(dense_layer=dense_layer) + self.activation_function = activation_function + self.scheme = scheme + self.batchnorm = batchnorm + self.dropout = dropout + self.name = name + self.is_training = is_training + self.parameterized_class_name = parameterized_class_name + + +class FCMiddlewareParameters(MiddlewareParameters): + def __init__(self, activation_function='relu', + scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium, + batchnorm: bool = False, dropout: bool = False, + name="middleware_fc_embedder", dense_layer=None, is_training=False): + super().__init__(parameterized_class_name="FCMiddleware", activation_function=activation_function, + scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, + is_training=is_training) + + +class LSTMMiddlewareParameters(MiddlewareParameters): + def __init__(self, activation_function='relu', number_of_lstm_cells=256, + scheme: MiddlewareScheme = MiddlewareScheme.Medium, + batchnorm: bool = False, dropout: bool = False, + name="middleware_lstm_embedder", dense_layer=None, is_training=False): + super().__init__(parameterized_class_name="LSTMMiddleware", activation_function=activation_function, + scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, + is_training=is_training) + self.number_of_lstm_cells = number_of_lstm_cells \ No newline at end of file diff --git a/rl_coach/architectures/tensorflow_components/embedders/__init__.py b/rl_coach/architectures/tensorflow_components/embedders/__init__.py index e69de29bb..eb0482f13 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/__init__.py +++ b/rl_coach/architectures/tensorflow_components/embedders/__init__.py @@ -0,0 +1,4 @@ +from .image_embedder import ImageEmbedder +from .vector_embedder import VectorEmbedder + +__all__ = ['ImageEmbedder', 'VectorEmbedder'] diff --git a/rl_coach/architectures/tensorflow_components/embedders/embedder.py b/rl_coach/architectures/tensorflow_components/embedders/embedder.py index 66b81ef58..004c5c4a7 100644 --- a/rl_coach/architectures/tensorflow_components/embedders/embedder.py +++ b/rl_coach/architectures/tensorflow_components/embedders/embedder.py @@ -28,35 +28,6 @@ from rl_coach.utils import force_list -class InputEmbedderParameters(NetworkComponentParameters): - def __init__(self, activation_function: str='relu', scheme: Union[List, EmbedderScheme]=EmbedderScheme.Medium, - batchnorm: bool=False, dropout=False, name: str='embedder', input_rescaling=None, input_offset=None, - input_clipping=None, dense_layer=Dense, is_training=False): - super().__init__(dense_layer=dense_layer) - self.activation_function = activation_function - self.scheme = scheme - self.batchnorm = batchnorm - self.dropout = dropout - - if input_rescaling is None: - input_rescaling = {'image': 255.0, 'vector': 1.0} - if input_offset is None: - input_offset = {'image': 0.0, 'vector': 0.0} - - self.input_rescaling = input_rescaling - self.input_offset = input_offset - self.input_clipping = input_clipping - self.name = name - self.is_training = is_training - - @property - def path(self): - return { - "image": 'image_embedder:ImageEmbedder', - "vector": 'vector_embedder:VectorEmbedder' - } - - class InputEmbedder(object): """ An input embedder is the first part of the network, which takes the input from the state and produces a vector @@ -83,6 +54,8 @@ def __init__(self, input_size: List[int], activation_function=tf.nn.relu, self.input_offset = input_offset self.input_clipping = input_clipping self.dense_layer = dense_layer + if self.dense_layer is None: + self.dense_layer = Dense self.is_training = is_training # layers order is conv -> batchnorm -> activation -> dropout diff --git a/rl_coach/architectures/tensorflow_components/general_network.py b/rl_coach/architectures/tensorflow_components/general_network.py index 087293100..fa494b572 100644 --- a/rl_coach/architectures/tensorflow_components/general_network.py +++ b/rl_coach/architectures/tensorflow_components/general_network.py @@ -20,10 +20,10 @@ import numpy as np import tensorflow as tf -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import HeadParameters +from rl_coach.architectures.middleware_parameters import MiddlewareParameters from rl_coach.architectures.tensorflow_components.architecture import TensorFlowArchitecture -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters -from rl_coach.architectures.tensorflow_components.middlewares.middleware import MiddlewareParameters from rl_coach.base_parameters import AgentParameters, EmbeddingMergerType from rl_coach.core_types import PredictionType from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace @@ -136,15 +136,17 @@ def get_input_embedder(self, input_name: str, embedder_params: InputEmbedderPara raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}" .format(input_name, allowed_inputs.keys())) - type = "vector" + mod_names = {'image': 'ImageEmbedder', 'vector': 'VectorEmbedder'} + + emb_type = "vector" if isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace): - type = "image" + emb_type = "image" - embedder_path = 'rl_coach.architectures.tensorflow_components.embedders.' + embedder_params.path[type] + embedder_path = 'rl_coach.architectures.tensorflow_components.embedders:' + mod_names[emb_type] embedder_params_copy = copy.copy(embedder_params) embedder_params_copy.activation_function = self.get_activation_function(embedder_params.activation_function) - embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[type] - embedder_params_copy.input_offset = embedder_params_copy.input_offset[type] + embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[emb_type] + embedder_params_copy.input_offset = embedder_params_copy.input_offset[emb_type] embedder_params_copy.name = input_name module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy, path=embedder_path, @@ -157,25 +159,25 @@ def get_middleware(self, middleware_params: MiddlewareParameters): :param middleware_params: the paramaeters of the middleware class :return: the middleware instance """ + mod_name = middleware_params.parameterized_class_name + middleware_path = 'rl_coach.architectures.tensorflow_components.middlewares:' + mod_name middleware_params_copy = copy.copy(middleware_params) middleware_params_copy.activation_function = self.get_activation_function(middleware_params.activation_function) - module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy) + module = dynamic_import_and_instantiate_module_from_params(middleware_params_copy, path=middleware_path) return module def get_output_head(self, head_params: HeadParameters, head_idx: int): """ Given a head type, creates the head and returns it :param head_params: the parameters of the head to create - :param head_type: the path to the class of the head under the embedders directory or a full path to a head class. - the path should be in the following structure: : :param head_idx: the head index - :param loss_weight: the weight to assign for the embedders loss :return: the head """ - + mod_name = head_params.parameterized_class_name + head_path = 'rl_coach.architectures.tensorflow_components.heads:' + mod_name head_params_copy = copy.copy(head_params) head_params_copy.activation_function = self.get_activation_function(head_params_copy.activation_function) - return dynamic_import_and_instantiate_module_from_params(head_params_copy, extra_kwargs={ + return dynamic_import_and_instantiate_module_from_params(head_params_copy, path=head_path, extra_kwargs={ 'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name, 'head_idx': head_idx, 'is_local': self.network_is_local}) diff --git a/rl_coach/architectures/tensorflow_components/heads/__init__.py b/rl_coach/architectures/tensorflow_components/heads/__init__.py index e69de29bb..7e642349a 100644 --- a/rl_coach/architectures/tensorflow_components/heads/__init__.py +++ b/rl_coach/architectures/tensorflow_components/heads/__init__.py @@ -0,0 +1,29 @@ +from .categorical_q_head import CategoricalQHead +from .ddpg_actor_head import DDPGActor +from .dnd_q_head import DNDQHead +from .dueling_q_head import DuelingQHead +from .measurements_prediction_head import MeasurementsPredictionHead +from .naf_head import NAFHead +from .policy_head import PolicyHead +from .ppo_head import PPOHead +from .ppo_v_head import PPOVHead +from .q_head import QHead +from .quantile_regression_q_head import QuantileRegressionQHead +from .rainbow_q_head import RainbowQHead +from .v_head import VHead + +__all__ = [ + 'CategoricalQHead', + 'DDPGActor', + 'DNDQHead', + 'DuelingQHead', + 'MeasurementsPredictionHead', + 'NAFHead', + 'PolicyHead', + 'PPOHead', + 'PPOVHead', + 'QHead', + 'QuantileRegressionQHead', + 'RainbowQHead', + 'VHead' +] diff --git a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py index 6f60a05a7..1f19a59c5 100644 --- a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py @@ -18,22 +18,12 @@ from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import SpacesDefinition -class CategoricalQHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='categorical_q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=CategoricalQHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class CategoricalQHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str ='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/cil_head.py b/rl_coach/architectures/tensorflow_components/heads/cil_head.py index 27bb0af13..15f9de12d 100644 --- a/rl_coach/architectures/tensorflow_components/heads/cil_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/cil_head.py @@ -16,25 +16,14 @@ import tensorflow as tf -from rl_coach.architectures.tensorflow_components.layers import Dense, batchnorm_activation_dropout - -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.layers import Dense +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace from rl_coach.utils import force_list -class RegressionHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense, scheme=[Dense(256), Dense(256)]): - super().__init__(parameterized_class=RegressionHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class RegressionHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py b/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py index 58011c667..6b3112aef 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ddpg_actor_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import ActionProbabilities from rl_coach.spaces import SpacesDefinition -class DDPGActorHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', batchnorm: bool=True, - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=DDPGActor, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - self.batchnorm = batchnorm - - class DDPGActor(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh', diff --git a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py index a8801384c..5c45146e1 100644 --- a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py @@ -16,23 +16,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters from rl_coach.architectures.tensorflow_components.heads.q_head import QHead from rl_coach.base_parameters import AgentParameters from rl_coach.memories.non_episodic import differentiable_neural_dictionary from rl_coach.spaces import SpacesDefinition -class DNDQHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='dnd_q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=DNDQHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class DNDQHead(QHead): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py index 05f730e1f..8237a91ba 100644 --- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py @@ -17,21 +17,11 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters from rl_coach.architectures.tensorflow_components.heads.q_head import QHead from rl_coach.base_parameters import AgentParameters from rl_coach.spaces import SpacesDefinition -class DuelingQHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='dueling_q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=DuelingQHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - class DuelingQHead(QHead): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/head.py b/rl_coach/architectures/tensorflow_components/heads/head.py index 7439f9a86..956bd5cb3 100644 --- a/rl_coach/architectures/tensorflow_components/heads/head.py +++ b/rl_coach/architectures/tensorflow_components/heads/head.py @@ -33,19 +33,6 @@ def _initializer(shape, dtype=None, partition_info=None): return _initializer -class HeadParameters(NetworkComponentParameters): - def __init__(self, parameterized_class: Type['Head'], activation_function: str = 'relu', name: str= 'head', - num_output_head_copies: int=1, rescale_gradient_from_head_by_factor: float=1.0, - loss_weight: float=1.0, dense_layer=Dense): - super().__init__(dense_layer=dense_layer) - self.activation_function = activation_function - self.name = name - self.num_output_head_copies = num_output_head_copies - self.rescale_gradient_from_head_by_factor = rescale_gradient_from_head_by_factor - self.loss_weight = loss_weight - self.parameterized_class_name = parameterized_class.__name__ - - class Head(object): """ A head is the final part of the network. It takes the embedding from the middleware embedder and passes it through @@ -74,6 +61,8 @@ def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, self.return_type = None self.activation_function = activation_function self.dense_layer = dense_layer + if self.dense_layer is None: + self.dense_layer = Dense def __call__(self, input_layer): """ diff --git a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py index f1172bcd8..647abc3c7 100644 --- a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense - -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import Measurements from rl_coach.spaces import SpacesDefinition -class MeasurementsPredictionHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='measurements_prediction_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=MeasurementsPredictionHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class MeasurementsPredictionHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/naf_head.py b/rl_coach/architectures/tensorflow_components/heads/naf_head.py index c2768cce6..9071fed76 100644 --- a/rl_coach/architectures/tensorflow_components/heads/naf_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/naf_head.py @@ -17,23 +17,13 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import BoxActionSpace from rl_coach.spaces import SpacesDefinition -class NAFHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='tanh', name: str='naf_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=NAFHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class NAFHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True,activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/policy_head.py b/rl_coach/architectures/tensorflow_components/heads/policy_head.py index da53a1439..99c995853 100644 --- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py @@ -18,7 +18,7 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import ActionProbabilities from rl_coach.exploration_policies.continuous_entropy import ContinuousEntropyParameters @@ -27,17 +27,6 @@ from rl_coach.utils import eps, indent_string -class PolicyHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='tanh', name: str='policy_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=PolicyHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - - class PolicyHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh', diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py index c76c03523..6ce7898c4 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py @@ -18,7 +18,7 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters, normalized_columns_initializer +from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import ActionProbabilities from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace @@ -26,16 +26,6 @@ from rl_coach.utils import eps -class PPOHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='tanh', name: str='ppo_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=PPOHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class PPOHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='tanh', diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py index 7253ecca1..968a97a29 100644 --- a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense - -from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import ActionProbabilities from rl_coach.spaces import SpacesDefinition -class PPOVHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='ppo_v_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=PPOVHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class PPOVHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/q_head.py b/rl_coach/architectures/tensorflow_components/heads/q_head.py index 56a9b974a..32c69463b 100644 --- a/rl_coach/architectures/tensorflow_components/heads/q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/q_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense - -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import SpacesDefinition, BoxActionSpace, DiscreteActionSpace -class QHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=QHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class QHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py index 012bbfaef..fa6e1e931 100644 --- a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense - -from rl_coach.architectures.tensorflow_components.heads.head import Head, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import SpacesDefinition -class QuantileRegressionQHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='quantile_regression_q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=QuantileRegressionQHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class QuantileRegressionQHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py index 6c216232a..2d2fb6ee5 100644 --- a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py @@ -17,22 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.heads.head import HeadParameters, Head +from rl_coach.architectures.tensorflow_components.heads.head import Head from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import QActionStateValue from rl_coach.spaces import SpacesDefinition -class RainbowQHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='rainbow_q_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=RainbowQHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class RainbowQHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/heads/v_head.py b/rl_coach/architectures/tensorflow_components/heads/v_head.py index 6b2b67a24..07dbf2538 100644 --- a/rl_coach/architectures/tensorflow_components/heads/v_head.py +++ b/rl_coach/architectures/tensorflow_components/heads/v_head.py @@ -17,23 +17,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import Dense - -from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer, HeadParameters +from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer from rl_coach.base_parameters import AgentParameters from rl_coach.core_types import VStateValue from rl_coach.spaces import SpacesDefinition -class VHeadParameters(HeadParameters): - def __init__(self, activation_function: str ='relu', name: str='v_head_params', - num_output_head_copies: int = 1, rescale_gradient_from_head_by_factor: float = 1.0, - loss_weight: float = 1.0, dense_layer=Dense): - super().__init__(parameterized_class=VHead, activation_function=activation_function, name=name, - dense_layer=dense_layer, num_output_head_copies=num_output_head_copies, - rescale_gradient_from_head_by_factor=rescale_gradient_from_head_by_factor, - loss_weight=loss_weight) - - class VHead(Head): def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, head_idx: int = 0, loss_weight: float = 1., is_local: bool = True, activation_function: str='relu', diff --git a/rl_coach/architectures/tensorflow_components/middlewares/__init__.py b/rl_coach/architectures/tensorflow_components/middlewares/__init__.py index e69de29bb..481eab0bf 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/__init__.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/__init__.py @@ -0,0 +1,4 @@ +from .fc_middleware import FCMiddleware +from .lstm_middleware import LSTMMiddleware + +__all__ = ["FCMiddleware", "LSTMMiddleware"] diff --git a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py index 5d3cb0c94..f85db8239 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/fc_middleware.py @@ -18,22 +18,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense -from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters +from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware from rl_coach.base_parameters import MiddlewareScheme from rl_coach.core_types import Middleware_FC_Embedding from rl_coach.utils import force_list -class FCMiddlewareParameters(MiddlewareParameters): - def __init__(self, activation_function='relu', - scheme: Union[List, MiddlewareScheme] = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, - name="middleware_fc_embedder", dense_layer=Dense, is_training=False): - super().__init__(parameterized_class=FCMiddleware, activation_function=activation_function, - scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, - is_training=is_training) - - class FCMiddleware(Middleware): def __init__(self, activation_function=tf.nn.relu, scheme: MiddlewareScheme = MiddlewareScheme.Medium, diff --git a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py index 19555542c..7c4a1b08e 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py @@ -19,23 +19,12 @@ import tensorflow as tf from rl_coach.architectures.tensorflow_components.layers import batchnorm_activation_dropout, Dense -from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware, MiddlewareParameters +from rl_coach.architectures.tensorflow_components.middlewares.middleware import Middleware from rl_coach.base_parameters import MiddlewareScheme from rl_coach.core_types import Middleware_LSTM_Embedding from rl_coach.utils import force_list -class LSTMMiddlewareParameters(MiddlewareParameters): - def __init__(self, activation_function='relu', number_of_lstm_cells=256, - scheme: MiddlewareScheme = MiddlewareScheme.Medium, - batchnorm: bool = False, dropout: bool = False, - name="middleware_lstm_embedder", dense_layer=Dense, is_training=False): - super().__init__(parameterized_class=LSTMMiddleware, activation_function=activation_function, - scheme=scheme, batchnorm=batchnorm, dropout=dropout, name=name, dense_layer=dense_layer, - is_training=is_training) - self.number_of_lstm_cells = number_of_lstm_cells - - class LSTMMiddleware(Middleware): def __init__(self, activation_function=tf.nn.relu, number_of_lstm_cells: int=256, scheme: MiddlewareScheme = MiddlewareScheme.Medium, diff --git a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py index 6011ef339..02376dec1 100644 --- a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py +++ b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py @@ -14,7 +14,6 @@ # limitations under the License. # import copy -from typing import Type, Union, List import tensorflow as tf @@ -23,20 +22,6 @@ from rl_coach.core_types import MiddlewareEmbedding -class MiddlewareParameters(NetworkComponentParameters): - def __init__(self, parameterized_class: Type['Middleware'], - activation_function: str='relu', scheme: Union[List, MiddlewareScheme]=MiddlewareScheme.Medium, - batchnorm: bool=False, dropout: bool=False, name='middleware', dense_layer=Dense, is_training=False): - super().__init__(dense_layer=dense_layer) - self.activation_function = activation_function - self.scheme = scheme - self.batchnorm = batchnorm - self.dropout = dropout - self.name = name - self.is_training = is_training - self.parameterized_class_name = parameterized_class.__name__ - - class Middleware(object): """ A middleware embedder is the middle part of the network. It takes the embeddings from the input embedders, @@ -57,6 +42,8 @@ def __init__(self, activation_function=tf.nn.relu, self.scheme = scheme self.return_type = MiddlewareEmbedding self.dense_layer = dense_layer + if self.dense_layer is None: + self.dense_layer = Dense self.is_training = is_training # layers order is conv -> batchnorm -> activation -> dropout diff --git a/rl_coach/presets/Atari_A3C.py b/rl_coach/presets/Atari_A3C.py index defcaafaf..cda5659d6 100644 --- a/rl_coach/presets/Atari_A3C.py +++ b/rl_coach/presets/Atari_A3C.py @@ -1,5 +1,5 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.environment import SingleLevelSelection diff --git a/rl_coach/presets/Atari_A3C_LSTM.py b/rl_coach/presets/Atari_A3C_LSTM.py index a5ff22311..d2edc566b 100644 --- a/rl_coach/presets/Atari_A3C_LSTM.py +++ b/rl_coach/presets/Atari_A3C_LSTM.py @@ -1,5 +1,5 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters -from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters +from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.environment import SingleLevelSelection diff --git a/rl_coach/presets/Atari_Dueling_DDQN.py b/rl_coach/presets/Atari_Dueling_DDQN.py index 39d87e236..a163b515c 100644 --- a/rl_coach/presets/Atari_Dueling_DDQN.py +++ b/rl_coach/presets/Atari_Dueling_DDQN.py @@ -1,7 +1,7 @@ import math from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters from rl_coach.environments.environment import SingleLevelSelection from rl_coach.environments.gym_environment import Atari, atari_deterministic_v4, atari_schedule diff --git a/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py b/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py index 73f5ff293..694590c87 100644 --- a/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py +++ b/rl_coach/presets/Atari_Dueling_DDQN_with_PER_OpenAI.py @@ -1,5 +1,5 @@ from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters from rl_coach.core_types import EnvironmentSteps from rl_coach.environments.environment import SingleLevelSelection diff --git a/rl_coach/presets/BitFlip_DQN.py b/rl_coach/presets/BitFlip_DQN.py index 621e87559..ed849f397 100644 --- a/rl_coach/presets/BitFlip_DQN.py +++ b/rl_coach/presets/BitFlip_DQN.py @@ -1,5 +1,5 @@ from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters from rl_coach.architectures.tensorflow_components.layers import Dense from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \ PresetValidationParameters diff --git a/rl_coach/presets/BitFlip_DQN_HER.py b/rl_coach/presets/BitFlip_DQN_HER.py index 6bd11422c..3d9c2f951 100644 --- a/rl_coach/presets/BitFlip_DQN_HER.py +++ b/rl_coach/presets/BitFlip_DQN_HER.py @@ -1,5 +1,5 @@ from rl_coach.agents.dqn_agent import DQNAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters from rl_coach.architectures.tensorflow_components.layers import Dense from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, \ PresetValidationParameters diff --git a/rl_coach/presets/CARLA_CIL.py b/rl_coach/presets/CARLA_CIL.py index 3e04b4fe7..8477cdf40 100644 --- a/rl_coach/presets/CARLA_CIL.py +++ b/rl_coach/presets/CARLA_CIL.py @@ -7,10 +7,10 @@ from rl_coach.logger import screen from rl_coach.agents.cil_agent import CILAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters -from rl_coach.architectures.tensorflow_components.heads.cil_head import RegressionHeadParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import RegressionHeadParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.architectures.tensorflow_components.layers import Conv2d, Dense, BatchnormActivationDropout -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.carla_environment import CarlaEnvironmentParameters diff --git a/rl_coach/presets/CARLA_Dueling_DDQN.py b/rl_coach/presets/CARLA_Dueling_DDQN.py index 8bad8b6b1..561ed0570 100644 --- a/rl_coach/presets/CARLA_Dueling_DDQN.py +++ b/rl_coach/presets/CARLA_Dueling_DDQN.py @@ -1,7 +1,7 @@ import math from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.carla_environment import CarlaEnvironmentParameters diff --git a/rl_coach/presets/CartPole_Dueling_DDQN.py b/rl_coach/presets/CartPole_Dueling_DDQN.py index 7463df4b4..861fdc534 100644 --- a/rl_coach/presets/CartPole_Dueling_DDQN.py +++ b/rl_coach/presets/CartPole_Dueling_DDQN.py @@ -1,7 +1,7 @@ import math from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import GymVectorEnvironment diff --git a/rl_coach/presets/Doom_Basic_Dueling_DDQN.py b/rl_coach/presets/Doom_Basic_Dueling_DDQN.py index 4fc32659a..a0ed0b3d9 100644 --- a/rl_coach/presets/Doom_Basic_Dueling_DDQN.py +++ b/rl_coach/presets/Doom_Basic_Dueling_DDQN.py @@ -1,5 +1,5 @@ from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.doom_environment import DoomEnvironmentParameters diff --git a/rl_coach/presets/ExplorationChain_Dueling_DDQN.py b/rl_coach/presets/ExplorationChain_Dueling_DDQN.py index f6575b3b0..b55e816a0 100644 --- a/rl_coach/presets/ExplorationChain_Dueling_DDQN.py +++ b/rl_coach/presets/ExplorationChain_Dueling_DDQN.py @@ -1,5 +1,5 @@ from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.gym_environment import GymEnvironmentParameters diff --git a/rl_coach/presets/Fetch_DDPG_HER_baselines.py b/rl_coach/presets/Fetch_DDPG_HER_baselines.py index 1119fca4d..d3fa643a8 100644 --- a/rl_coach/presets/Fetch_DDPG_HER_baselines.py +++ b/rl_coach/presets/Fetch_DDPG_HER_baselines.py @@ -1,7 +1,7 @@ from rl_coach.agents.ddpg_agent import DDPGAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.middleware_parameters import FCMiddlewareParameters from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.middlewares.fc_middleware import FCMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, EmbedderScheme, PresetValidationParameters from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps from rl_coach.environments.environment import SingleLevelSelection diff --git a/rl_coach/presets/Mujoco_A3C_LSTM.py b/rl_coach/presets/Mujoco_A3C_LSTM.py index 729f1ced0..1027c010d 100644 --- a/rl_coach/presets/Mujoco_A3C_LSTM.py +++ b/rl_coach/presets/Mujoco_A3C_LSTM.py @@ -1,7 +1,7 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.middleware_parameters import LSTMMiddlewareParameters from rl_coach.architectures.tensorflow_components.layers import Dense -from rl_coach.architectures.tensorflow_components.middlewares.lstm_middleware import LSTMMiddlewareParameters from rl_coach.base_parameters import VisualizationParameters, MiddlewareScheme, PresetValidationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.environment import SingleLevelSelection diff --git a/rl_coach/presets/Pendulum_HAC.py b/rl_coach/presets/Pendulum_HAC.py index 8b0826da1..b6fa02c9b 100644 --- a/rl_coach/presets/Pendulum_HAC.py +++ b/rl_coach/presets/Pendulum_HAC.py @@ -1,7 +1,7 @@ import numpy as np from rl_coach.agents.hac_ddpg_agent import HACDDPGAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters from rl_coach.architectures.tensorflow_components.layers import Dense from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme from rl_coach.core_types import EnvironmentEpisodes, EnvironmentSteps, TrainingSteps diff --git a/rl_coach/presets/Starcraft_CollectMinerals_A3C.py b/rl_coach/presets/Starcraft_CollectMinerals_A3C.py index f3cef5454..2559333ae 100644 --- a/rl_coach/presets/Starcraft_CollectMinerals_A3C.py +++ b/rl_coach/presets/Starcraft_CollectMinerals_A3C.py @@ -1,6 +1,6 @@ from rl_coach.agents.actor_critic_agent import ActorCriticAgentParameters from rl_coach.agents.policy_optimization_agent import PolicyGradientRescaler -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters diff --git a/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py b/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py index cb1c6a85f..155bfce14 100644 --- a/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py +++ b/rl_coach/presets/Starcraft_CollectMinerals_Dueling_DDQN.py @@ -1,8 +1,8 @@ from collections import OrderedDict from rl_coach.agents.ddqn_agent import DDQNAgentParameters -from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters -from rl_coach.architectures.tensorflow_components.heads.dueling_q_head import DuelingQHeadParameters +from rl_coach.architectures.embedder_parameters import InputEmbedderParameters +from rl_coach.architectures.head_parameters import DuelingQHeadParameters from rl_coach.base_parameters import VisualizationParameters from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps from rl_coach.environments.starcraft2_environment import StarCraft2EnvironmentParameters diff --git a/tutorials/0. Quick Start Guide.ipynb b/tutorials/0. Quick Start Guide.ipynb index 6e29812f9..963ec7491 100644 --- a/tutorials/0. Quick Start Guide.ipynb +++ b/tutorials/0. Quick Start Guide.ipynb @@ -134,7 +134,7 @@ "from rl_coach.environments.gym_environment import GymVectorEnvironment\n", "from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager\n", "from rl_coach.graph_managers.graph_manager import SimpleSchedule\n", - "from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters\n", + "from rl_coach.architectures.embedder_parameters import InputEmbedderParameters\n", "\n", "# define the environment parameters\n", "bit_length = 10\n", diff --git a/tutorials/3. Implementing a Hierarchical RL Graph.ipynb b/tutorials/3. Implementing a Hierarchical RL Graph.ipynb index eafa78fac..f200c2c78 100644 --- a/tutorials/3. Implementing a Hierarchical RL Graph.ipynb +++ b/tutorials/3. Implementing a Hierarchical RL Graph.ipynb @@ -162,9 +162,9 @@ "metadata": {}, "outputs": [], "source": [ - "from rl_coach.architectures.tensorflow_components.architecture import Dense\n", + "from rl_coach.architectures.tensorflow_components.layers import Dense\n", "from rl_coach.base_parameters import VisualizationParameters, EmbeddingMergerType, EmbedderScheme\n", - "from rl_coach.architectures.tensorflow_components.embedders.embedder import InputEmbedderParameters\n", + "from rl_coach.architectures.embedder_parameters import InputEmbedderParameters\n", "from rl_coach.memories.episodic.episodic_hindsight_experience_replay import HindsightGoalSelectionMethod, \\\n", " EpisodicHindsightExperienceReplayParameters\n", "from rl_coach.memories.episodic.episodic_hrl_hindsight_experience_replay import \\\n",