diff --git a/axelrod/strategies/_strategies.py b/axelrod/strategies/_strategies.py index 014f89f1e..a521747af 100644 --- a/axelrod/strategies/_strategies.py +++ b/axelrod/strategies/_strategies.py @@ -44,7 +44,8 @@ from .mindcontrol import MindController, MindWarper, MindBender from .mindreader import MindReader, ProtectedMindReader, MirrorMindReader from .oncebitten import OnceBitten, FoolMeOnce, ForgetfulFoolMeOnce, FoolMeForever -from .prober import Prober, Prober2, Prober3, HardProber, NaiveProber +from .prober import (Prober, Prober2, Prober3, HardProber, + NaiveProber, RemorsefulProber) from .punisher import Punisher, InversePunisher from .qlearner import RiskyQLearner, ArrogantQLearner, HesitantQLearner, CautiousQLearner from .rand import Random @@ -157,6 +158,7 @@ Raider, Random, RandomHunter, + RemorsefulProber, Retaliate, Retaliate2, Retaliate3, diff --git a/axelrod/strategies/prober.py b/axelrod/strategies/prober.py index 3dbd21b8f..630d8784d 100644 --- a/axelrod/strategies/prober.py +++ b/axelrod/strategies/prober.py @@ -1,5 +1,7 @@ from axelrod import Actions, Player, init_args, random_choice +import random + C, D = Actions.C, Actions.D @@ -143,7 +145,7 @@ class NaiveProber(Player): name = 'Naive Prober' classifier = { - 'memory_depth': 1, # Four-Vector = (1.,0.,1.,0.) + 'memory_depth': 1, 'stochastic': True, 'makes_use_of': set(), 'inspects_source': False, @@ -171,9 +173,65 @@ def strategy(self, opponent): # React to the opponent's last move if opponent.history[-1] == D: return D - # Otherwise cooperate, defect with a small probability + # Otherwise cooperate, defect with probability 1 - self.p choice = random_choice(1 - self.p) return choice def __repr__(self): return "%s: %s" % (self.name, round(self.p, 2)) + + +class RemorsefulProber(NaiveProber): + """ + Like Naive Prober, but it remembers if the opponent responds to a random + defection with a defection by being remorseful and cooperating. + + For reference see: "Engineering Design of Strategies for Winning + Iterated Prisoner's Dilemma Competitions" by Jiawei Li, Philip Hingston, + and Graham Kendall. IEEE TRANSACTIONS ON COMPUTATIONAL INTELLIGENCE AND AI + IN GAMES, VOL. 3, NO. 4, DECEMBER 2011 + + A more complete description is given in "The Selfish Gene" + (https://books.google.co.uk/books?id=ekonDAAAQBAJ): + + "Remorseful Prober remembers whether it has just spontaneously defected, and + whether the result was prompt retaliation. If so, it 'remorsefully' allows + its opponent 'one free hit' without retaliating." + """ + + name = 'Remorseful Prober' + classifier = { + 'memory_depth': 2, # It remembers if it's previous move was random + 'stochastic': True, + 'makes_use_of': set(), + 'inspects_source': False, + 'manipulates_source': False, + 'manipulates_state': False + } + + def __init__(self, p=0.1): + NaiveProber.__init__(self, p) + self.probing = False + + def strategy(self, opponent): + # First move + if len(self.history) == 0: + return C + # React to the opponent's last move + if opponent.history[-1] == D: + if self.probing: + self.probing = False + return C + return D + + # Otherwise cooperate with probability 1 - self.p + if random.random() < 1 - self.p: + self.probing = False + return C + + self.probing = True + return D + + def reset(self): + Player.reset(self) + self.probing = False diff --git a/axelrod/tests/unit/test_prober.py b/axelrod/tests/unit/test_prober.py index 51389c9f7..66667237c 100644 --- a/axelrod/tests/unit/test_prober.py +++ b/axelrod/tests/unit/test_prober.py @@ -1,8 +1,7 @@ """Tests for prober strategies.""" -import random - import axelrod +import random from .test_player import TestPlayer, test_responses @@ -162,3 +161,72 @@ def test_reduction_to_TFT(self): test_responses(self, player, opponent, [C], [D], [D]) test_responses(self, player, opponent, [C, D], [D, C], [C]) test_responses(self, player, opponent, [C, D], [D, D], [D]) + + +class TestRemorsefulProber(TestPlayer): + + name = "Remorseful Prober: 0.1" + player = axelrod.RemorsefulProber + expected_classifier = { + 'memory_depth': 2, + 'stochastic': True, + 'makes_use_of': set(), + 'inspects_source': False, + 'manipulates_source': False, + 'manipulates_state': False + } + + def test_strategy(self): + "Randomly defects (probes) and always retaliates like tit for tat." + self.first_play_test(C) + + player = self.player(0.4) + opponent = axelrod.Random() + player.history = [C, C] + opponent.history = [C, D] + self.assertEqual(player.strategy(opponent), D) + + def test_random_defection(self): + # Random defection + player = self.player(0.4) + opponent = axelrod.Random() + test_responses(self, player, opponent, [C], [C], [D], random_seed=1) + + def test_remorse(self): + """After probing, if opponent retaliates, will offer a C""" + player = self.player(0.4) + opponent = axelrod.Random() + + random.seed(0) + player.history = [C] + opponent.history = [C] + self.assertEqual(player.strategy(opponent), D) # Random defection + self.assertEqual(player.probing, True) + + player.history = [C, D] + opponent.history = [C, D] + self.assertEqual(player.strategy(opponent), C) # Remorse + self.assertEqual(player.probing, False) + + player.history = [C, D, C] + opponent.history = [C, D, D] + self.assertEqual(player.strategy(opponent), D) + self.assertEqual(player.probing, False) + + def test_reduction_to_TFT(self): + player = self.player(0) + opponent = axelrod.Random() + test_responses(self, player, opponent, [C], [C], [C], random_seed=1, + attrs={'probing': False}) + test_responses(self, player, opponent, [C], [D], [D], + attrs={'probing': False}) + test_responses(self, player, opponent, [C, D], [D, C], [C], + attrs={'probing': False}) + test_responses(self, player, opponent, [C, D], [D, D], [D], + attrs={'probing': False}) + + def test_reset_probing(self): + player = self.player(0.4) + player.probing = True + player.reset() + self.assertFalse(player.probing) diff --git a/docs/tutorials/advanced/classification_of_strategies.rst b/docs/tutorials/advanced/classification_of_strategies.rst index a107dd463..c9408226d 100644 --- a/docs/tutorials/advanced/classification_of_strategies.rst +++ b/docs/tutorials/advanced/classification_of_strategies.rst @@ -24,7 +24,7 @@ This allows us to, for example, quickly identify all the stochastic strategies:: >>> len([s for s in axl.strategies if s().classifier['stochastic']]) - 40 + 41 Or indeed find out how many strategy only use 1 turn worth of memory to make a decision:: diff --git a/docs/tutorials/advanced/strategy_transformers.rst b/docs/tutorials/advanced/strategy_transformers.rst index a5bbc9a28..bce6c058b 100644 --- a/docs/tutorials/advanced/strategy_transformers.rst +++ b/docs/tutorials/advanced/strategy_transformers.rst @@ -104,13 +104,13 @@ The library includes the following transformers: >>> ApologizingDefector = ApologyTransformer([D], [C])(axelrod.Defector) >>> player = ApologizingDefector() -You can pass any two sequences in. In this example the player would apologize -after two consequtive rounds of `(D, C)`:: + You can pass any two sequences in. In this example the player would apologize + after two consequtive rounds of `(D, C)`:: - >>> import axelrod - >>> from axelrod.strategy_transformers import ApologyTransformer - >>> ApologizingDefector = ApologyTransformer([D, D], [C, C])(axelrod.Defector) - >>> player = ApologizingDefector() + >>> import axelrod + >>> from axelrod.strategy_transformers import ApologyTransformer + >>> ApologizingDefector = ApologyTransformer([D, D], [C, C])(axelrod.Defector) + >>> player = ApologizingDefector() * :code:`DeadlockBreakingTransformer`: Attempts to break :code:`(D, C) -> (C, D)` deadlocks by cooperating::