Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding remorseful prober #633

Merged
merged 8 commits into from
Jun 17, 2016
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion axelrod/strategies/_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
from .mindcontrol import MindController, MindWarper, MindBender
from .mindreader import MindReader, ProtectedMindReader, MirrorMindReader
from .oncebitten import OnceBitten, FoolMeOnce, ForgetfulFoolMeOnce, FoolMeForever
from .prober import Prober, Prober2, Prober3, HardProber, NaiveProber
from .prober import (Prober, Prober2, Prober3, HardProber,
NaiveProber, RemorsefulProber)
from .punisher import Punisher, InversePunisher
from .qlearner import RiskyQLearner, ArrogantQLearner, HesitantQLearner, CautiousQLearner
from .rand import Random
Expand Down Expand Up @@ -157,6 +158,7 @@
Raider,
Random,
RandomHunter,
RemorsefulProber,
Retaliate,
Retaliate2,
Retaliate3,
Expand Down
58 changes: 56 additions & 2 deletions axelrod/strategies/prober.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from axelrod import Actions, Player, init_args, random_choice

import random

C, D = Actions.C, Actions.D


Expand Down Expand Up @@ -143,7 +145,7 @@ class NaiveProber(Player):

name = 'Naive Prober'
classifier = {
'memory_depth': 1, # Four-Vector = (1.,0.,1.,0.)
'memory_depth': 1,
'stochastic': True,
'makes_use_of': set(),
'inspects_source': False,
Expand Down Expand Up @@ -171,9 +173,61 @@ def strategy(self, opponent):
# React to the opponent's last move
if opponent.history[-1] == D:
return D
# Otherwise cooperate, defect with a small probability
# Otherwise cooperate, defect with probability 1 - self.p
choice = random_choice(1 - self.p)
return choice

def __repr__(self):
return "%s: %s" % (self.name, round(self.p, 2))


class RemorsefulProber(NaiveProber):
"""
Like Naive Prober, but it remembers if the opponent responds to a random
defection with a defection by being remorseful and cooperating.

For reference see: "Engineering Design of Strategies for Winning
Iterated Prisoner's Dilemma Competitions" by Jiawei Li, Philip Hingston,
and Graham Kendall. IEEE TRANSACTIONS ON COMPUTATIONAL INTELLIGENCE AND AI
IN GAMES, VOL. 3, NO. 4, DECEMBER 2011

A better description is given in the selfish gene:
https://books.google.co.uk/books?id=ekonDAAAQBAJ&pg=PA273&lpg=PA273&dq=remorseful+prober&source=bl&ots=kAeYRYg7GB&sig=RD5-XtDAxzTF9rxRZEWyFjwuKhc&hl=en&sa=X&ved=0ahUKEwiFg--H_qvNAhWXF8AKHQVTAzcQ6AEIKDAC#v=onepage&q=remorseful%20prober&f=false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That url can be shortened to just https://books.google.co.uk/books?id=ekonDAAAQBAJ

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good shout, I'll give this a general clean up now :)

"""

name = 'Remorseful Prober'
classifier = {
'memory_depth': 2, # It remembers if it's previous move was random
'stochastic': True,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

def __init__(self, p=0.1):
NaiveProber.__init__(self, p)
self.probing = False

def strategy(self, opponent):
# First move
if len(self.history) == 0:
return C
# React to the opponent's last move
if opponent.history[-1] == D:
if self.probing:
self.probing = False
return C
return D

# Otherwise cooperate with probability 1 - self.p
if random.random() < 1 - self.p:
self.probing = False
return C

self.probing = True
return D

def reset(self):
Player.reset(self)
self.probing = False
68 changes: 66 additions & 2 deletions axelrod/tests/unit/test_prober.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Tests for prober strategies."""

import random

import axelrod
import random

from .test_player import TestPlayer, test_responses

Expand Down Expand Up @@ -162,3 +161,68 @@ def test_reduction_to_TFT(self):
test_responses(self, player, opponent, [C], [D], [D])
test_responses(self, player, opponent, [C, D], [D, C], [C])
test_responses(self, player, opponent, [C, D], [D, D], [D])


class TestRemorsefulProber(TestPlayer):

name = "Remorseful Prober: 0.1"
player = axelrod.RemorsefulProber
expected_classifier = {
'memory_depth': 2,
'stochastic': True,
'makes_use_of': set(),
'inspects_source': False,
'manipulates_source': False,
'manipulates_state': False
}

def test_strategy(self):
"Randomly defects (probes) and always retaliates like tit for tat."
self.first_play_test(C)

player = self.player(0.4)
opponent = axelrod.Random()
player.history = [C, C]
opponent.history = [C, D]
self.assertEqual(player.strategy(opponent), D)

def test_random_defection(self):
# Random defection
player = self.player(0.4)
opponent = axelrod.Random()
test_responses(self, player, opponent, [C], [C], [D], random_seed=1)

def test_remorse(self):
"""After probing, if opponent retaliates, will offer a C"""
player = self.player(0.4)
opponent = axelrod.Random()

random.seed(0)
player.history = [C]
opponent.history = [C]
self.assertEqual(player.strategy(opponent), D) # Random defection
self.assertEqual(player.probing, True)

player.history = [C, D]
opponent.history = [C, D]
self.assertEqual(player.strategy(opponent), C) # Remorse
self.assertEqual(player.probing, False)

player.history = [C, D, C]
opponent.history = [C, D, D]
self.assertEqual(player.strategy(opponent), D)
self.assertEqual(player.probing, False)

def test_reduction_to_TFT(self):
player = self.player(0)
opponent = axelrod.Random()
test_responses(self, player, opponent, [C], [C], [C], random_seed=1)
test_responses(self, player, opponent, [C], [D], [D])
test_responses(self, player, opponent, [C, D], [D, C], [C])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use e.g. attrs = {'probing': False} with test_responses and responses_test to check that self.probing is appropriately set (also above).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I tried the various helper functions but with no success and was getting frustrated as to whether I was making mistakes with the strategy or misusing the helper functions so it just felt easier to use the verbose calls...

I'll have another go. It might have just been me getting myself confused.

test_responses(self, player, opponent, [C, D], [D, D], [D])

def test_reset_probing(self):
player = self.player(0.4)
player.probing = True
player.reset()
self.assertFalse(player.probing)
2 changes: 1 addition & 1 deletion docs/tutorials/advanced/classification_of_strategies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ This allows us to, for example, quickly identify all the stochastic
strategies::

>>> len([s for s in axl.strategies if s().classifier['stochastic']])
40
41

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't line 33 need to change too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this strategy has memory depth 2 as it needs to remember if a 'probe' was punished (so that it can be remorseful).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes - I was looking at the memory depth for Naive Prober by mistake

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

On Thu, Jun 16, 2016 at 1:29 PM Owen Campbell [email protected]
wrote:

In docs/tutorials/advanced/classification_of_strategies.rst
#633 (comment):

@@ -24,7 +24,7 @@ This allows us to, for example, quickly identify all the stochastic
strategies::

 >>> len([s for s in axl.strategies if s().classifier['stochastic']])
  • 40
  • 41

Ah yes - I was looking at the memory depth for Naive Prober by mistake


You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
https://github.com/Axelrod-Python/Axelrod/pull/633/files/907fd35e0288d1ce0927041ec821309516513010#r67335105,
or mute the thread
https://github.com/notifications/unsubscribe/ACCGWnwvjl9UK5na1lJiEFj-iZeoEJZoks5qMUGWgaJpZM4I3KSF
.

Or indeed find out how many strategy only use 1 turn worth of memory to
make a decision::
Expand Down
12 changes: 6 additions & 6 deletions docs/tutorials/advanced/strategy_transformers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ The library includes the following transformers:
>>> ApologizingDefector = ApologyTransformer([D], [C])(axelrod.Defector)
>>> player = ApologizingDefector()

You can pass any two sequences in. In this example the player would apologize
after two consequtive rounds of `(D, C)`::
You can pass any two sequences in. In this example the player would apologize
after two consequtive rounds of `(D, C)`::

>>> import axelrod
>>> from axelrod.strategy_transformers import ApologyTransformer
>>> ApologizingDefector = ApologyTransformer([D, D], [C, C])(axelrod.Defector)
>>> player = ApologizingDefector()
>>> import axelrod
>>> from axelrod.strategy_transformers import ApologyTransformer
>>> ApologizingDefector = ApologyTransformer([D, D], [C, C])(axelrod.Defector)
>>> player = ApologizingDefector()

* :code:`DeadlockBreakingTransformer`: Attempts to break :code:`(D, C) -> (C, D)` deadlocks by cooperating::

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be here? It doesn't appear to be related to the new strategy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this to this PR just to fix the indentation for the docs. It's unrelated.

Expand Down