Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UnusedInputError #58

Open
uniwf2016 opened this issue May 17, 2016 · 0 comments
Open

UnusedInputError #58

uniwf2016 opened this issue May 17, 2016 · 0 comments

Comments

@uniwf2016
Copy link

Hi,

Thanks for reading this post.

Currently, I am trying to create my own network for reinforcement learning. To this end, I have adapted the Q network from
Playing Atari with Deep Reinforcement Learning
Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis
Antonoglou, Daan Wierstra, Martin Riedmiller
and
Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533.

When theano tries to compile function for loss and q_val (),

self._train = theano.function([], [loss, q_vals], updates=updates,
givens=givens_train)
self._q_vals = theano.function([], q_vals,
givens=givens_q_val)

it keeps returning
UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 0 is not part of the computational graph needed to compute the outputs: <CudaNdarrayType(float32, 4D)>.
To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.

I have been debugging the code many many times, but I cannot understand why the inputs (from givens) are not used as part of the function/ calculation.

Many thanks in advance for your explanation.

Here is my full source code for the network:

"""
import lasagne
import numpy as np
import theano
import theano.tensor as T
from updates import deepmind_rmsprop
import logging

class DeepQLearner:
"""
Deep Q-learning network using Lasagne.
"""
def init(self, width_img,
height_img,
width_loc,
height_loc,
width_his,
height_his,
target_dis_size,
num_actions, num_frames, discount, learning_rate, rho,
rms_epsilon, momentum, clip_delta, freeze_interval,
batch_size, network_type, update_rule,
batch_accumulator, rng, input_scale=8.0):

    self.width_img = width_img
    self.height_img = height_img
    self.width_loc = width_loc
    self.height_loc = height_loc
    self.width_his = width_his
    self.height_his = height_his
    self.target_dis_size = target_dis_size

    self.num_actions = num_actions
    self.num_frames = num_frames
    self.batch_size = batch_size
    self.discount = discount
    self.rho = rho
    self.lr = learning_rate
    self.rms_epsilon = rms_epsilon
    self.momentum = momentum
    self.clip_delta = clip_delta
    self.freeze_interval = freeze_interval
    self.rng = rng

    self.logger = logging.getLogger(__name__)
    if not getattr(self.logger, 'handler_set', None):

        self.logger.setLevel(logging.DEBUG)
        # create a file handler

        handler = logging.FileHandler('toy.log', mode='a')
        handler.setLevel(logging.DEBUG)

        # create a logging format

        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)

        # add the handlers to the logger

        self.logger.addHandler(handler)
        self.logger.handler_set = True
    self.logger.info('initialise a Q network.')

    lasagne.random.set_rng(self.rng)

    self.update_counter = 0

    self.l_out = self.build_network(network_type, num_actions, num_frames, batch_size)
    if self.freeze_interval > 0:
        self.next_l_out = self.build_network(network_type, num_actions,
                                             num_frames, batch_size)
        self.reset_q_hat()

    #states = T.tensor4('states')
    #next_states = T.tensor4('next_states')
    imgs = T.tensor4('imgs')
    next_imgs = T.tensor4('next_imgs')
    locs = T.tensor4('locs')
    next_locs = T.tensor4('next_locs')
    hiss = T.tensor4('hiss')
    next_hiss = T.tensor4('next_hiss')

target_distribution = T.tensor('target_distribution')

next_target_distribution = T.tensor('next_target_distribution')

    sds = T.icol('sds')
    next_sds  = T.icol('next_sds')

    rewards = T.col('rewards')
    actions = T.icol('actions')
    terminals = T.icol('terminals')

self.states_shared = theano.shared(

np.zeros((batch_size, num_frames, input_height, input_width),

dtype=theano.config.floatX))

self.next_states_shared = theano.shared(

np.zeros((batch_size, num_frames, input_height, input_width),

dtype=theano.config.floatX))

    self.imgs_shared = theano.shared(
            np.zeros((batch_size, num_frames, width_img, height_img),
                     dtype=theano.config.floatX))
    self.next_imgs_shared = theano.shared(
            np.zeros((batch_size, num_frames, width_img, height_img),
                     dtype=theano.config.floatX))

    self.locs_shared = theano.shared(
    np.zeros((batch_size, num_frames, width_loc, height_loc),
             dtype=theano.config.floatX))
    self.next_locs_shared = theano.shared(
    np.zeros((batch_size, num_frames, width_loc, height_loc),
             dtype=theano.config.floatX))
    self.hiss_shared = theano.shared(
    np.zeros((batch_size, num_frames, width_his, height_his),
             dtype=theano.config.floatX))
    self.next_hiss_shared = theano.shared(
    np.zeros((batch_size, num_frames, width_his, height_his),
             dtype=theano.config.floatX))
    self.sds_shared = theano.shared(
        np.zeros((batch_size, 1), dtype='int32'),
        broadcastable=(False, True))
    self.next_sds_shared= theano.shared(
        np.zeros((batch_size, 1), dtype='int32'),
        broadcastable=(False, True))

    self.rewards_shared = theano.shared(
        np.zeros((batch_size, 1), dtype=theano.config.floatX),
        broadcastable=(False, True))

    self.actions_shared = theano.shared(
        np.zeros((batch_size, 1), dtype='int32'),
        broadcastable=(False, True))

    self.terminals_shared = theano.shared(
        np.zeros((batch_size, 1), dtype='int32'),
        broadcastable=(False, True))

q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)

    # massage/ unpack states into the right form for multi input network


    q_vals = lasagne.layers.get_output(self.l_out, {'l_in':imgs,
              'l_loc1':locs,  'l_his':hiss,
              'l_dis': sds})

    if self.freeze_interval > 0:

next_q_vals = lasagne.layers.get_output(self.next_l_out,

next_states / input_scale)

        next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                {'l_in':next_imgs,
                              'l_loc1':next_locs,  'l_his':next_hiss,
                              'l_dis': next_sds})
    else:
        next_q_vals = lasagne.layers.get_output(self.l_out,
                                                {'l_in':next_imgs,
                              'l_loc1':next_locs,  'l_his':next_hiss,
                              'l_dis': next_sds})

        next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

    target = (rewards +
              (T.ones_like(terminals) - terminals) *
              self.discount * T.max(next_q_vals, axis=1, keepdims=True))
    diff = target - q_vals[T.arange(batch_size),
                           actions.reshape((-1,))].reshape((-1, 1))

    if self.clip_delta > 0:
        # If we simply take the squared clipped diff as our loss,
        # then the gradient will be zero whenever the diff exceeds
        # the clip bounds. To avoid this, we extend the loss
        # linearly past the clip point to keep the gradient constant
        # in that regime.
        # 
        # This is equivalent to declaring d loss/d q_vals to be
        # equal to the clipped diff, then backpropagating from
        # there, which is what the DeepMind implementation does.
        quadratic_part = T.minimum(abs(diff), self.clip_delta)
        linear_part = abs(diff) - quadratic_part
        loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
    else:
        loss = 0.5 * diff ** 2

    if batch_accumulator == 'sum':
        loss = T.sum(loss)
    elif batch_accumulator == 'mean':
        loss = T.mean(loss)
    else:
        raise ValueError("Bad accumulator: {}".format(batch_accumulator))

    params = lasagne.layers.helper.get_all_params(self.l_out)  
    givens_train = {

states: self.states_shared,

next_states: self.next_states_shared,

        imgs :self.imgs_shared,
        next_imgs :self.next_imgs_shared, 
        locs: self.locs_shared ,
        next_locs :self.next_locs_shared,
        hiss :self.hiss_shared,
        next_hiss :self.next_hiss_shared ,
        sds : self.sds_shared ,
        next_sds : self.next_sds_shared ,
        rewards: self.rewards_shared,
        actions: self.actions_shared,
        terminals: self.terminals_shared
    }
    givens_q_val = {

states: self.states_shared,

next_states: self.next_states_shared,

        imgs :self.imgs_shared,
        locs: self.locs_shared ,
        hiss :self.hiss_shared,
        sds : self.sds_shared

rewards: self.rewards_shared,

actions: self.actions_shared,

terminals: self.terminals_shared

    }
    if update_rule == 'deepmind_rmsprop':
        updates = deepmind_rmsprop(loss, params, self.lr, self.rho,
                                   self.rms_epsilon)
    elif update_rule == 'rmsprop':
        updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho,
                                          self.rms_epsilon)
    elif update_rule == 'sgd':
        updates = lasagne.updates.sgd(loss, params, self.lr)
    else:
        raise ValueError("Unrecognized update: {}".format(update_rule))

    if self.momentum > 0:
        updates = lasagne.updates.apply_momentum(updates, None,
                                                 self.momentum)

    self._train = theano.function([], [loss, q_vals], updates=updates,
                                  givens=givens_train)
    self._q_vals = theano.function([], q_vals,
                                   givens=givens_q_val)

def build_network(self, network_type, output_dim, num_frames, batch_size):
    if network_type == "myOwn":
        return self.build_myNetwork(output_dim, num_frames, batch_size)
    else:
        raise ValueError("Unrecognized network: {}".format(network_type))


def build_myNetwork(self, output_dim, num_frames, batch_size):

    from lasagne.layers import dnn
    l_in = lasagne.layers.InputLayer(
        shape=(batch_size, num_frames, self.width_img, self.height_img)
    )

    l_conv1 = dnn.Conv2DDNNLayer(
        l_in,
        num_filters=32,
        filter_size=(8, 8),
        stride=(3, 3),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.HeUniform(),
        b=lasagne.init.Constant(.1)
    )

    l_conv2 = dnn.Conv2DDNNLayer(
        l_conv1,
        num_filters=64,
        filter_size=(4, 4),
        stride=(1, 1),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.HeUniform(),
        b=lasagne.init.Constant(.1)
    )

    l_conv3 = dnn.Conv2DDNNLayer(
        l_conv2,
        num_filters=64,
        filter_size=(3, 3),
        stride=(1, 1),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.HeUniform(),
        b=lasagne.init.Constant(.1)
    )

    l_loc1 = lasagne.layers.InputLayer(
        shape=(batch_size, num_frames, self.width_loc, self.height_loc)
    )

    n = 64
    l_loc2 = lasagne.layers.DenseLayer(l_loc1, num_units=n)



    #history = np.zeros((batch_size, num_frames, 4, 24*24), dtype=int)


    l_his = lasagne.layers.InputLayer(
        shape=(batch_size, num_frames, self.width_his, self.height_his)
    )

    l_his2 = lasagne.layers.DenseLayer(l_his, num_units=n)


    l_dis =  lasagne.layers.InputLayer(
        shape=(batch_size, num_frames, self.target_dis_size)
    )

    l_dis2 =  lasagne.layers.DenseLayer(l_dis, num_units=n)

    l_conv4 = lasagne.layers.ReshapeLayer(l_conv3, (batch_size, 1, -1))
    l_loc2 = lasagne.layers.ReshapeLayer(l_loc2, (batch_size,1,-1))
    l_his2 = lasagne.layers.ReshapeLayer(l_his2, (batch_size,1,-1))
    l_dis2 = lasagne.layers.ReshapeLayer(l_dis2, (batch_size,1,-1))
    l_merge = lasagne.layers.ElemwiseSumLayer((l_conv4,l_loc2, l_his2, l_dis2 ))

    print (l_conv4.output_shape)
    print l_loc2.output_shape
    print l_his2.output_shape
    print l_dis2.output_shape
    print l_merge.output_shape

    l_hidden1 = lasagne.layers.DenseLayer(
        l_merge,
        num_units=320,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.HeUniform(),
        b=lasagne.init.Constant(.1)
    )
    #
    l_out = lasagne.layers.DenseLayer(
        l_hidden1,
        num_units=output_dim,
        nonlinearity=None,
        W=lasagne.init.HeUniform(),
        b=lasagne.init.Constant(.1)
    )
    return l_out
def train(self,  imgs ,next_imgs ,  locs,  next_locs ,hiss,
                 next_hiss, sds ,
                 next_sds,
                    actions, rewards, terminals):
    """
    Train one batch.

    Arguments:

    states - b x f x h x w numpy array, where b is batch size,
             f is num frames, h is height and w is width.
    actions - b x 1 numpy array of integers
    rewards - b x 1 numpy array
    next_states - b x f x h x w numpy array
    terminals - b x 1 numpy boolean array (currently ignored)

    Returns: average loss
    """
    self.imgs_shared.set_value(imgs)
    self.next_imgs_shared.set_value(next_imgs)
    self.locs_shared.set_value(locs)
    self.next_locs_share.set_value(next_locs)
    self.hiss_shared.set_value(hiss)
    self.next_hiss_shared.set_value(next_hiss)
    self.sds_shared.set_value(sds)
    self.next_sds_shared.set_value(next_sds)

self.states_shared.set_value(states)

self.next_states_shared.set_value(next_states)

    self.actions_shared.set_value(actions)
    self.rewards_shared.set_value(rewards)
    self.terminals_shared.set_value(terminals)
    if (self.freeze_interval > 0 and
        self.update_counter % self.freeze_interval == 0):
        self.reset_q_hat()
    loss, _ = self._train()
    self.update_counter += 1
    return np.sqrt(loss)

def q_vals(self, img , loc,  his, sd):

states = np.zeros((self.batch_size, self.num_frames, self.input_height,

self.input_width), dtype=theano.config.floatX)

states[0, ...] = state

self.states_shared.set_value(states)

    imgs = np.zeros((self.batch_size, self.num_frames, self.height_img, 
                     self.width_img), dtype=theano.config.floatX)
    imgs[0, ...] = img
    locs = np.zeros((self.batch_size, self.num_frames, self.height_loc, 
                     self.width_loc), dtype=theano.config.floatX)
    locs[0, ...] = loc

    hiss = np.zeros((self.batch_size, self.num_frames, self.height_his, 
                     self.width_his), dtype=theano.config.floatX)

    hiss[0,...] = his

    sds = np.zeros((self.batch_size, self.num_frames, self.target_dis_size),
                    dtype='int32')

    sds[0, ...] = sd

    self.imgs_shared.set_value(imgs)
    self.locs_shared.set_value(locs)
    self.hiss_shared.set_value(hiss)
    self.sds_shared.set_value(sds)

    return self._q_vals()[0]

def choose_action(self, img , loc,  his, sd, epsilon):
    if self.rng.rand() < epsilon:
        return self.rng.randint(0, self.num_actions)
    q_vals = self.q_vals(img , loc,  his, sd)
    return np.argmax(q_vals)

def reset_q_hat(self):
    all_params = lasagne.layers.helper.get_all_param_values(self.l_out)
    lasagne.layers.helper.set_all_param_values(self.next_l_out, all_params)

def main():

net = DeepQLearner(84, 84, 16, 4, .99, .00025, .95, .95, 10000,

#32, 'nature_cuda')

width_img = 24
height_img = 24
width_loc = 1
height_loc = 3
width_his = width_img *height_img
height_his = 4
target_dis_size = 1
num_actions = 9
num_frames = 1
discount = 0.99
learning_rate = .00025
rho = 0.95
rms_epsilon = 0.95
momentum = 0.95
clip_delta = 1
freeze_interval = 100
batch_size = 100
network_type = 'myOwn'
update_rule = 'deepmind_rmsprop' 
batch_accumulator ='sum'
rng = np.random.RandomState(123456)

net = DeepQLearner(width_img,
             height_img,
             width_loc,
             height_loc,
             width_his,
             height_his,
             target_dis_size,
            num_actions, num_frames, discount, learning_rate, rho,
             rms_epsilon, momentum, clip_delta, freeze_interval,
             batch_size, network_type, update_rule,
             batch_accumulator, rng)

if name == 'main':
main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant