-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
87 lines (75 loc) · 2.68 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import torch
import numpy as np
from normalization import Normalization, RewardScaling
from replaybuffer import ReplayBuffer
from ppo_continuous import PPO_continuous
import env_2022
import os
from matplotlib import pyplot as plt
def draw_trajectory(states, times):
'''
Draw trajectory of agents
input:whole logs of the episode(before normalization)
output:none
'''
save_dir = './test_img/'
if not os.path.exists(save_dir):
os,os.mkdir(save_dir)
save_path = save_dir + 'Evaluate_No_' + str(times) + '.png'
target_position = [states[0][3] * 10, states[0][4] * 10]
#print(target_position)
plt.scatter(target_position[0], target_position[1], s=4)
agent_position = []
for i in range(len(states)):
agent_position.append([states[i][0]*10, states[i][1]*10])
#print(states)
x = np.array(agent_position)[:, 0]
y = np.array(agent_position)[:, 1]
plt.plot(x, y, 'r')
plt.savefig(save_path)
plt.cla()
print('Test '+ str(times) + ' . Image had saved.')
def main():
times = 10
evaluate_reward = 0
seed = 10
env = env_2022.UAVEnv(seed=seed)
#state_norm = Normalization(shape=len(env.get_state()))
checkpoint = './checkpoint/PPO_continuous_Beta_env_UAV_number_7_seed_10.pth.tar'
state_dict = torch.load(checkpoint)
#print(state_dict)
agent = state_dict['agent']
state_norm = state_dict['Normalization']
for t in range(times):
log_ep = []
s = env.reset()
log_ep.append(s)
s = state_norm(s, update=False) # During the evaluating,update=False
done = False
episode_reward = 0
step = 0
pre_a = 0
while not done:
step += 1
s = torch.unsqueeze(torch.tensor(s, dtype=torch.float), 0)
if step % 5 == 1:
a = agent.actor.mean(s).detach().numpy().flatten() # We use the deterministic policy during the evaluating
#dist = actor.get_dist(s)
#a = dist.sample().numpy().flatten()
else:
a = pre_a
action = a
s_, r, done, _ = env.step(action, step)
log_ep.append(s_)
#env.render()
s_ = state_norm(s_, update=False)
episode_reward += r
s = s_
pre_a = a
print('episode '+str(t)+' , rewards = '+str(episode_reward))
draw_trajectory(log_ep, t)
evaluate_reward += episode_reward
return evaluate_reward / times
if __name__ == '__main__':
reward = main()
print("evaluate_reward:{} \t".format(reward))