-
Notifications
You must be signed in to change notification settings - Fork 16
It's simple, we kill the Pac Man
In order to create a new ghost agent, we need to declare a new class in agents.py
that implements both learn
and act
methods. The latter, act
, is called every simulation step to send an action to the simulator. learn
is called before act
during the learning phase to make the agent improve from its experience.
For instance, let's create an agent that always walk North (assuming that 'North'
is a valid representation of an action that moves it upwards in a particular simulator). This class must go inside the simulation's agents.py
file.
from multiagentrl import core
class NorthAgent(core.BaseControllerAgent):
def learn(self, state, action, reward)
def act(self, state, legal_actions, explore):
return 'North'
That's it! This agent chooses the action North
whenever requested.
We also modify the simulation's adapter.py
to send the agent class to the controller informing that we must launch one NorthAgent
instance when the simulation starts.
class ExampleExperiment(core.BaseExperiment):
def __init__(self, learn_games, test_games):
super(ExampleExperiment, self).__init__(
learn_games=learn_games,
test_games=test_games)
self.simulator = ExampleSimulator()
self.agents = [ExampleAgent()]
def execute_game(self):
# Send first state before start learning
[agent.send_state() for agent in self.agents]
while not self.simulator.is_finished():
# Receive an action for the current state
actions = [agent.receive_action() for agent in self.agents]
# Simulate one step
self.simulator.step(action)
# Update state to learn from the received reward
[agent.send_state() for agent in self.agents]
# Get reward when executing the action and reaching the new state
[agent.send_reward() for agent in self.agents]
class ExampleAgent(core.BaseAdapterAgent):
def __init__(self, agent_type, map_width, map_height):
super(ExampleAgent, self,).__init__()
self.agent_id = 0
self.agent_type = agent_type
self.map_width = map_width
self.map_height = map_height
def start_experiment(self):
message = messages.StartExperimentMessage(
agent_id=self.agent_id,
agent_team=self.agent_type,
agent_class=NorthAgent,
map_width=self.map_width,
map_height=self.map_height)
self.communicate(message)
def finish_experiment(self):
message = messages.FinishExperimentMessage(agent_id=self.agent_id)
self.communicate(message)
def start_game(self):
message = messages.StartGameMessage(agent_id=self.agent_id)
self.communicate(message)
def finish_game(self):
message = messages.FinishGameMessage(agent_id=self.agent_id)
self.communicate(message)
def send_state(self):
message = messages.StateMessage(
agent_id=self.agent_id,
state=self.state,
legal_actions=self.actions,
explore=self.is_learning)
return self.communicate(message)
def receive_action(self):
action_message = self.send_state()
self.action = action_message.action
return self.action
def send_reward(self):
if self.is_learning:
message = messages.RewardMessage(
agent_id=self.agent_id, state=self.state,
action=self.action, reward=self.reward)
self.communicate(message)
Now, our NorthAgent
is running in the controller and will select the 'North'
action at every simulation step!