forked from QuantEcon/QuantEcon.py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bimatrix_generators.py
509 lines (443 loc) · 18.7 KB
/
bimatrix_generators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
"""
This module contains functions that generate NormalFormGame instances of
the 2-player games studied by Fearnley, Igwe, and Savani (2015):
* Colonel Blotto Games (`blotto_game`): A non-zero sum extension of the
Blotto game as studied by Hortala-Vallve and Llorente-Saguer (2012),
where opposing parties have asymmetric and heterogeneous battlefield
valuations.
* Ranking Games (`ranking_game`): In these games, as studied by Goldberg
et al. (2013), each player chooses an effort level associated with a
cost and a score. The players are ranked according to their scores,
and the player with the higher score wins the prize. Each player's
payoff is given by the value of the prize minus the cost of the
effort.
* SGC Games (`sgc_game`): These games were introduced by Sandholm,
Gilpin, and Conitzer (2005) as a worst case scenario for support
enumeration as it has a unique equilibrium where each player uses half
of his actions in his support.
* Tournament Games (`tournament_game`): These games are constructed by
Anbalagan et al. (2013) as games that do not have interim epsilon-Nash
equilibria with constant cardinaliry supports for epsilon smaller than
a certain threshold.
* Unit vector Games (`unit_vector_game`)
Large part of the code here is based on the C code available at
https://github.com/bimatrix-games/bimatrix-generators distributed under
BSD 3-Clause License.
References
----------
* Y. Anbalagan, S. Norin, R. Savani, and A. Vetta, "Polylogarithmic
Supports Are Required for Approximate Well-Supported Nash Equilibria
below 2/3," WINE, 2013.
* J. Fearnley, T. P. Igwe, R. Savani, "An Empirical Study of Finding
Approximate Equilibria in Bimatrix Games," International Symposium on
Experimental Algorithms (SEA), 2015.
* L. A. Goldberg, P. W. Goldberg, P. Krysta, and C. Ventre, "Ranking
Games that have Competitiveness-based Strategies", Theoretical
Computer Science, 2013.
* R. Hortala-Vallve and A. Llorente-Saguer, "Pure Strategy Nash
Equilibria in Non-Zero Sum Colonel Blotto Games", International
Journal of Game Theory, 2012.
* T. Sandholm, A. Gilpin, and V. Conitzer, "Mixed-Integer Programming
Methods for Finding Nash Equilibria," AAAI, 2005.
"""
# BSD 3-Clause License
#
# Copyright (c) 2015, John Fearnley, Tobenna P. Igwe, Rahul Savani
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import scipy.special
from numba import jit
from ..normal_form_game import Player, NormalFormGame
from ...util import check_random_state
from ...gridtools import simplex_grid
from ...graph_tools import random_tournament_graph
from ...util.combinatorics import next_k_array, k_array_rank_jit
def blotto_game(h, T, rho, mu=0, random_state=None):
"""
Return a NormalFormGame instance of a 2-player non-zero sum Colonel
Blotto game (Hortala-Vallve and Llorente-Saguer, 2012), where the
players have an equal number `T` of troops to assign to `h` hills
(so that the number of actions for each player is equal to
(T+h-1) choose (h-1) = (T+h-1)!/(T!*(h-1)!)). Each player has a
value for each hill that he receives if he assigns strictly more
troops to the hill than his opponent (ties are broken uniformly at
random), where the values are drawn from a multivariate normal
distribution with covariance `rho`. Each player’s payoff is the sum
of the values of the hills won by that player.
Parameters
----------
h : scalar(int)
Number of hills.
T : scalar(int)
Number of troops.
rho : scalar(float)
Covariance of values of each hill. Must be in [-1, 1].
mu : scalar(float), optional(default=0)
Mean of values of each hill.
random_state : int or np.random.RandomState, optional
Random seed (integer) or np.random.RandomState instance to set
the initial state of the random number generator for
reproducibility. If None, a randomly initialized RandomState is
used.
Returns
-------
g : NormalFormGame
"""
actions = simplex_grid(h, T)
n = actions.shape[0]
payoff_arrays = tuple(np.empty((n, n)) for i in range(2))
mean = np.array([mu, mu])
cov = np.array([[1, rho], [rho, 1]])
random_state = check_random_state(random_state)
values = random_state.multivariate_normal(mean, cov, h)
_populate_blotto_payoff_arrays(payoff_arrays, actions, values)
g = NormalFormGame(
[Player(payoff_array) for payoff_array in payoff_arrays]
)
return g
@jit(nopython=True)
def _populate_blotto_payoff_arrays(payoff_arrays, actions, values):
"""
Populate the ndarrays in `payoff_arrays` with the payoff values of
the Blotto game with h hills and T troops.
Parameters
----------
payoff_arrays : tuple(ndarray(float, ndim=2))
Tuple of 2 ndarrays of shape (n, n), where n = (T+h-1)!/
(T!*(h-1)!). Modified in place.
actions : ndarray(int, ndim=2)
ndarray of shape (n, h) containing all possible actions, i.e.,
h-part compositions of T.
values : ndarray(float, ndim=2)
ndarray of shape (h, 2), where `values[k, :]` contains the
players' values of hill `k`.
"""
n, h = actions.shape
payoffs = np.empty(2)
for i in range(n):
for j in range(n):
payoffs[:] = 0
for k in range(h):
if actions[i, k] == actions[j, k]:
for p in range(2):
payoffs[p] += values[k, p] / 2
else:
winner = np.int(actions[i, k] < actions[j, k])
payoffs[winner] += values[k, winner]
payoff_arrays[0][i, j], payoff_arrays[1][j, i] = payoffs
def ranking_game(n, steps=10, random_state=None):
"""
Return a NormalFormGame instance of (the 2-player version of) the
"ranking game" studied by Goldberg et al. (2013), where each player
chooses an effort level associated with a score and a cost which are
both increasing functions with randomly generated step sizes. The
player with the higher score wins the first prize, whose value is 1,
and the other player obtains the "second prize" of value 0; in the
case of a tie, the first prize is split and each player receives a
value 0.5. The payoff of a player is given by the value of the prize
minus the cost of the effort.
Parameters
----------
n : scalar(int)
Positive integer determining the number of actions, i.e, effort
levels.
steps : scalar(int), optional(default=10)
Parameter determining the random step sizes for the scores and
costs for each player: The step sizes for the scores are drawn
from `1`, ..., `steps`, while those for the costs are multiples
of `1/(n*steps)`, where the cost of effort level `0` is 0, and
the maximum possible cost of effort level `n-1` is less than or
equal to 1.
random_state : int or np.random.RandomState, optional
Random seed (integer) or np.random.RandomState instance to set
the initial state of the random number generator for
reproducibility. If None, a randomly initialized RandomState is
used.
Returns
-------
g : NormalFormGame
Examples
--------
>>> g = ranking_game(5, random_state=1234)
>>> g.players[0]
Player([[ 0. , 0. , 0. , 0. , 0. ],
[ 0.82, -0.18, -0.18, -0.18, -0.18],
[ 0.8 , 0.8 , -0.2 , -0.2 , -0.2 ],
[ 0.68, 0.68, 0.68, -0.32, -0.32],
[ 0.66, 0.66, 0.66, 0.66, -0.34]])
>>> g.players[1]
Player([[ 1. , 0. , 0. , 0. , 0. ],
[ 0.8 , 0.8 , -0.2 , -0.2 , -0.2 ],
[ 0.66, 0.66, 0.66, -0.34, -0.34],
[ 0.6 , 0.6 , 0.6 , 0.6 , -0.4 ],
[ 0.58, 0.58, 0.58, 0.58, 0.58]])
"""
payoff_arrays = tuple(np.empty((n, n)) for i in range(2))
random_state = check_random_state(random_state)
scores = random_state.randint(1, steps+1, size=(2, n))
np.cumsum(scores, axis=1, out=scores)
costs = np.empty((2, n-1))
costs[:] = random_state.randint(1, steps+1, size=(2, n-1))
np.cumsum(costs, axis=1, out=costs)
costs[:] /= (n * steps)
_populate_ranking_payoff_arrays(payoff_arrays, scores, costs)
g = NormalFormGame(
[Player(payoff_array) for payoff_array in payoff_arrays]
)
return g
@jit(nopython=True)
def _populate_ranking_payoff_arrays(payoff_arrays, scores, costs):
"""
Populate the ndarrays in `payoff_arrays` with the payoff values of
the ranking game with scores and costs.
Parameters
----------
payoff_arrays : tuple(ndarray(float, ndim=2))
Tuple of 2 ndarrays of shape (n, n). Modified in place.
scores : ndarray(int, ndim=2)
ndarray of shape (2, n) containing score values corresponding to
the effort levels for the two players.
costs : ndarray(float, ndim=2)
ndarray of shape (2, n-1) containing cost values corresponding
to the n-1 positive effort levels for the two players, with the
assumption that the cost of the zero effort action is zero.
"""
n = payoff_arrays[0].shape[0]
for p, payoff_array in enumerate(payoff_arrays):
payoff_array[0, :] = 0
for i in range(1, n):
for j in range(n):
payoff_array[i, j] = -costs[p, i-1]
prize = 1.
for i in range(n):
for j in range(n):
if scores[0, i] > scores[1, j]:
payoff_arrays[0][i, j] += prize
elif scores[0, i] < scores[1, j]:
payoff_arrays[1][j, i] += prize
else:
payoff_arrays[0][i, j] += prize / 2
payoff_arrays[1][j, i] += prize / 2
def sgc_game(k):
"""
Return a NormalFormGame instance of the 2-player game introduced by
Sandholm, Gilpin, and Conitzer (2005), which has a unique Nash
equilibrium, where each player plays half of the actions with
positive probabilities. Payoffs are normalized so that the minimum
and the maximum payoffs are 0 and 1, respectively.
Parameters
----------
k : scalar(int)
Positive integer determining the number of actions. The returned
game will have `4*k-1` actions for each player.
Returns
-------
g : NormalFormGame
Examples
--------
>>> g = sgc_game(2)
>>> g.players[0]
Player([[ 0.75, 0.5 , 1. , 0.5 , 0.5 , 0.5 , 0.5 ],
[ 1. , 0.75, 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ],
[ 0.5 , 1. , 0.75, 0.5 , 0.5 , 0.5 , 0.5 ],
[ 0. , 0. , 0. , 0.75, 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0.75, 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0.75, 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0.75]])
>>> g.players[1]
Player([[ 0.75, 0.5 , 1. , 0.5 , 0.5 , 0.5 , 0.5 ],
[ 1. , 0.75, 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ],
[ 0.5 , 1. , 0.75, 0.5 , 0.5 , 0.5 , 0.5 ],
[ 0. , 0. , 0. , 0. , 0.75, 0. , 0. ],
[ 0. , 0. , 0. , 0.75, 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0.75],
[ 0. , 0. , 0. , 0. , 0. , 0.75, 0. ]])
"""
payoff_arrays = tuple(np.empty((4*k-1, 4*k-1)) for i in range(2))
_populate_sgc_payoff_arrays(payoff_arrays)
g = NormalFormGame(
[Player(payoff_array) for payoff_array in payoff_arrays]
)
return g
@jit(nopython=True)
def _populate_sgc_payoff_arrays(payoff_arrays):
"""
Populate the ndarrays in `payoff_arrays` with the payoff values of
the SGC game.
Parameters
----------
payoff_arrays : tuple(ndarray(float, ndim=2))
Tuple of 2 ndarrays of shape (4*k-1, 4*k-1). Modified in place.
"""
n = payoff_arrays[0].shape[0] # 4*k-1
m = (n+1)//2 - 1 # 2*k-1
for payoff_array in payoff_arrays:
for i in range(m):
for j in range(m):
payoff_array[i, j] = 0.75
for j in range(m, n):
payoff_array[i, j] = 0.5
for i in range(m, n):
for j in range(n):
payoff_array[i, j] = 0
payoff_array[0, m-1] = 1
payoff_array[0, 1] = 0.5
for i in range(1, m-1):
payoff_array[i, i-1] = 1
payoff_array[i, i+1] = 0.5
payoff_array[m-1, m-2] = 1
payoff_array[m-1, 0] = 0.5
k = (m+1)//2
for h in range(k):
i, j = m + 2*h, m + 2*h
payoff_arrays[0][i, j] = 0.75
payoff_arrays[0][i+1, j+1] = 0.75
payoff_arrays[1][j, i+1] = 0.75
payoff_arrays[1][j+1, i] = 0.75
def tournament_game(n, k, random_state=None):
"""
Return a NormalFormGame instance of the 2-player win-lose game,
whose payoffs are either 0 or 1, introduced by Anbalagan et al.
(2013). Player 0 has n actions, which constitute the set of nodes
{0, ..., n-1}, while player 1 has n choose k actions, each
corresponding to a subset of k elements of the set of n nodes. Given
a randomly generated tournament graph on the n nodes, the payoff for
player 0 is 1 if, in the tournament, the node chosen by player 0
dominates all the nodes in the k-subset chosen by player 1. The
payoff for player 1 is 1 if player 1's k-subset contains player 0's
node.
Parameters
----------
n : scalar(int)
Positive integer determining the number of nodes in the
tournament graph.
k : scalar(int)
Positive integer determining the size of subsets of nodes in the
tournament graph.
random_state : int or np.random.RandomState, optional
Random seed (integer) or np.random.RandomState instance to set
the initial state of the random number generator for
reproducibility. If None, a randomly initialized RandomState is
used.
Returns
-------
g : NormalFormGame
Notes
-----
The actions of player 1 are ordered according to the combinatorial
number system [1]_, different from the order used in the original
library in C.
Examples
--------
>>> g = tournament_game(5, 2, random_state=1234)
>>> g.players[0].payoff_array
array([[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]])
>>> g.players[1].payoff_array
array([[ 1., 1., 0., 0., 0.],
[ 1., 0., 1., 0., 0.],
[ 0., 1., 1., 0., 0.],
[ 1., 0., 0., 1., 0.],
[ 0., 1., 0., 1., 0.],
[ 0., 0., 1., 1., 0.],
[ 1., 0., 0., 0., 1.],
[ 0., 1., 0., 0., 1.],
[ 0., 0., 1., 0., 1.],
[ 0., 0., 0., 1., 1.]])
References
----------
.. [1] `Combinatorial number system
<https://en.wikipedia.org/wiki/Combinatorial_number_system>`_,
Wikipedia.
"""
m = scipy.special.comb(n, k, exact=True)
if m > np.iinfo(np.intp).max:
raise ValueError('Maximum allowed size exceeded')
payoff_arrays = tuple(np.zeros(shape) for shape in [(n, m), (m, n)])
tourn = random_tournament_graph(n, random_state=random_state)
indices, indptr = tourn.csgraph.indices, tourn.csgraph.indptr
_populate_tournament_payoff_array0(payoff_arrays[0], k, indices, indptr)
_populate_tournament_payoff_array1(payoff_arrays[1], k)
g = NormalFormGame(
[Player(payoff_array) for payoff_array in payoff_arrays]
)
return g
@jit(nopython=True)
def _populate_tournament_payoff_array0(payoff_array, k, indices, indptr):
"""
Populate `payoff_array` with the payoff values for player 0 in the
tournament game given a random tournament graph in CSR format.
Parameters
----------
payoff_array : ndarray(float, ndim=2)
ndarray of shape (n, m), where m = n choose k, prefilled with
zeros. Modified in place.
k : scalar(int)
Size of the subsets of nodes.
indices : ndarray(int, ndim=1)
CSR format index array of the adjacency matrix of the tournament
graph.
indptr : ndarray(int, ndim=1)
CSR format index pointer array of the adjacency matrix of the
tournament graph.
"""
n = payoff_array.shape[0]
X = np.empty(k, dtype=np.int_)
a = np.empty(k, dtype=np.int_)
for i in range(n):
d = indptr[i+1] - indptr[i]
if d >= k:
for j in range(k):
a[j] = j
while a[-1] < d:
for j in range(k):
X[j] = indices[indptr[i]+a[j]]
payoff_array[i, k_array_rank_jit(X)] = 1
a = next_k_array(a)
@jit(nopython=True)
def _populate_tournament_payoff_array1(payoff_array, k):
"""
Populate `payoff_array` with the payoff values for player 1 in the
tournament game.
Parameters
----------
payoff_array : ndarray(float, ndim=2)
ndarray of shape (m, n), where m = n choose k, prefilled with
zeros. Modified in place.
k : scalar(int)
Size of the subsets of nodes.
"""
m = payoff_array.shape[0]
X = np.arange(k)
for j in range(m):
for i in range(k):
payoff_array[j, X[i]] = 1
X = next_k_array(X)