You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Currently, I am trying to create my own network for reinforcement learning. To this end, I have adapted the Q network from Playing Atari with Deep Reinforcement Learning
Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis
Antonoglou, Daan Wierstra, Martin Riedmiller
and
Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533.
When theano tries to compile function for loss and q_val (),
it keeps returning
UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: <CudaNdarrayType(float32, 4D)>.
To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.
I have been debugging the code many many times, but I cannot understand why the inputs (from givens) are not used as part of the function/ calculation.
Many thanks in advance for your explanation.
Here is my full source code for the network:
"""
import lasagne
import numpy as np
import theano
import theano.tensor as T
from updates import deepmind_rmsprop
import logging
q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
# massage/ unpack states into the right form for multi input network
q_vals = lasagne.layers.get_output(self.l_out, {'l_in':imgs,
'l_loc1':locs, 'l_his':hiss,
'l_dis': sds})
if self.freeze_interval > 0:
next_q_vals = lasagne.layers.get_output(self.next_l_out,
{'l_in':next_imgs,
'l_loc1':next_locs, 'l_his':next_hiss,
'l_dis': next_sds})
else:
next_q_vals = lasagne.layers.get_output(self.l_out,
{'l_in':next_imgs,
'l_loc1':next_locs, 'l_his':next_hiss,
'l_dis': next_sds})
next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
target = (rewards +
(T.ones_like(terminals) - terminals) *
self.discount * T.max(next_q_vals, axis=1, keepdims=True))
diff = target - q_vals[T.arange(batch_size),
actions.reshape((-1,))].reshape((-1, 1))
if self.clip_delta > 0:
# If we simply take the squared clipped diff as our loss,
# then the gradient will be zero whenever the diff exceeds
# the clip bounds. To avoid this, we extend the loss
# linearly past the clip point to keep the gradient constant
# in that regime.
#
# This is equivalent to declaring d loss/d q_vals to be
# equal to the clipped diff, then backpropagating from
# there, which is what the DeepMind implementation does.
quadratic_part = T.minimum(abs(diff), self.clip_delta)
linear_part = abs(diff) - quadratic_part
loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
else:
loss = 0.5 * diff ** 2
if batch_accumulator == 'sum':
loss = T.sum(loss)
elif batch_accumulator == 'mean':
loss = T.mean(loss)
else:
raise ValueError("Bad accumulator: {}".format(batch_accumulator))
params = lasagne.layers.helper.get_all_params(self.l_out)
givens_train = {
Hi,
Thanks for reading this post.
Currently, I am trying to create my own network for reinforcement learning. To this end, I have adapted the Q network from
Playing Atari with Deep Reinforcement Learning
Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis
Antonoglou, Daan Wierstra, Martin Riedmiller
and
Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533.
When theano tries to compile function for loss and q_val (),
self._train = theano.function([], [loss, q_vals], updates=updates,
givens=givens_train)
self._q_vals = theano.function([], q_vals,
givens=givens_q_val)
it keeps returning
UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: <CudaNdarrayType(float32, 4D)>.
To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.
I have been debugging the code many many times, but I cannot understand why the inputs (from givens) are not used as part of the function/ calculation.
Many thanks in advance for your explanation.
Here is my full source code for the network:
"""
import lasagne
import numpy as np
import theano
import theano.tensor as T
from updates import deepmind_rmsprop
import logging
class DeepQLearner:
"""
Deep Q-learning network using Lasagne.
"""
def init(self, width_img,
height_img,
width_loc,
height_loc,
width_his,
height_his,
target_dis_size,
num_actions, num_frames, discount, learning_rate, rho,
rms_epsilon, momentum, clip_delta, freeze_interval,
batch_size, network_type, update_rule,
batch_accumulator, rng, input_scale=8.0):
target_distribution = T.tensor('target_distribution')
next_target_distribution = T.tensor('next_target_distribution')
self.states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
self.next_states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
next_q_vals = lasagne.layers.get_output(self.next_l_out,
next_states / input_scale)
states: self.states_shared,
next_states: self.next_states_shared,
states: self.states_shared,
next_states: self.next_states_shared,
rewards: self.rewards_shared,
actions: self.actions_shared,
terminals: self.terminals_shared
self.states_shared.set_value(states)
self.next_states_shared.set_value(next_states)
states = np.zeros((self.batch_size, self.num_frames, self.input_height,
self.input_width), dtype=theano.config.floatX)
states[0, ...] = state
self.states_shared.set_value(states)
def main():
net = DeepQLearner(84, 84, 16, 4, .99, .00025, .95, .95, 10000,
#32, 'nature_cuda')
if name == 'main':
main()
The text was updated successfully, but these errors were encountered: