diff --git a/week04_approx_rl/homework_pytorch_main.ipynb b/week04_approx_rl/homework_pytorch_main.ipynb index 537d6b633..232578753 100644 --- a/week04_approx_rl/homework_pytorch_main.ipynb +++ b/week04_approx_rl/homework_pytorch_main.ipynb @@ -891,7 +891,7 @@ "N_STEPS = 100\n", "\n", "exp_replay = ReplayBuffer(REPLAY_BUFFER_SIZE)\n", - "for i in range(REPLAY_BUFFER_SIZE // N_STEPS)):\n", + "for i in range(REPLAY_BUFFER_SIZE // N_STEPS):\n", " if not utils.is_enough_ram(min_available_gb=0.1):\n", " print(\"\"\"\n", " Less than 100 Mb RAM available. \n", @@ -991,7 +991,7 @@ "\n", " if step % loss_freq == 0:\n", " td_loss_history.append(loss.data.cpu().item())\n", - " grad_norm_history.append(grad_norm)\n", + " grad_norm_history.append(grad_norm.cpu())\n", "\n", " if step % refresh_target_network_freq == 0:\n", " # Load agent weights into target_network\n", diff --git a/week04_approx_rl/seminar_pytorch.ipynb b/week04_approx_rl/seminar_pytorch.ipynb index ab3dbb109..9161a92ec 100644 --- a/week04_approx_rl/seminar_pytorch.ipynb +++ b/week04_approx_rl/seminar_pytorch.ipynb @@ -176,18 +176,18 @@ "def compute_td_loss(states, actions, rewards, next_states, is_done, gamma=0.99, check_shapes=False):\n", " \"\"\" Compute td loss using torch operations only. Use the formula above. \"\"\"\n", " states = torch.tensor(\n", - " states, dtype=torch.float32) # shape: [batch_size, state_size]\n", - " actions = torch.tensor(actions, dtype=torch.long) # shape: [batch_size]\n", - " rewards = torch.tensor(rewards, dtype=torch.float32) # shape: [batch_size]\n", + " states, dtype=torch.float32) # shape: [batch_size, state_size]\n", + " actions = torch.tensor(actions, dtype=torch.long) # shape: [batch_size]\n", + " rewards = torch.tensor(rewards, dtype=torch.float32) # shape: [batch_size]\n", " # shape: [batch_size, state_size]\n", " next_states = torch.tensor(next_states, dtype=torch.float32)\n", - " is_done = torch.tensor(is_done, dtype=torch.uint8) # shape: [batch_size]\n", + " is_done = torch.tensor(is_done, dtype=torch.uint8) # shape: [batch_size]\n", "\n", " # get q-values for all actions in current states\n", - " predicted_qvalues = network(states)\n", + " predicted_qvalues = network(states) # shape: [batch_size, n_actions]\n", "\n", " # select q-values for chosen actions\n", - " predicted_qvalues_for_actions = predicted_qvalues[\n", + " predicted_qvalues_for_actions = predicted_qvalues[ # shape: [batch_size]\n", " range(states.shape[0]), actions\n", " ]\n", "\n",