-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
36 lines (32 loc) · 1.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gym
import numpy as np
import take5
TAKE_LARGEST = False
config = {"sides": 5, "multi_agent": False}
env = gym.make('Take5-v0', config=config)
episodes = 100
returns = []
for e in range(episodes):
observation = env.reset()
for i in range(10):
episode_returns = 0
if TAKE_LARGEST:
action = env.hands[0].argmax()
else:
action = i
if config["multi_agent"]:
if TAKE_LARGEST:
action = {player: env.hands[p].argmax() for p, player in enumerate(observation.keys())}
else:
action = {player: i for player in observation.keys()}
observation, reward, done, info = env.step(action)
print(reward)
if config["multi_agent"]:
episode_returns += list(reward.values())[0]
else:
episode_returns += reward
env.render()
returns.append(episode_returns)
returns = np.array(returns)
print("Multi-agent: %r, take largest card policy: %r, Reward max: %f, mean: %f, min: %f" % (
config["multi_agent"], TAKE_LARGEST, returns.max(), returns.mean(), returns.min()))