diff --git a/2048.py b/2048.py new file mode 100644 index 0000000..7457cba --- /dev/null +++ b/2048.py @@ -0,0 +1,53 @@ +from agent import Agent +from board import Board +from analyze import Analyzer +#from argparse import ArgumentParser + +TRAIN = True +EPISODE = 50000 +MILESTONE = 1000 + + +if __name__ == "__main__": + Game = Board() + AI = Agent() + analysis = Analyzer() + if TRAIN == True: + totalR = 0 + for e in range(EPISODE): + Game.initialize() + AI.Episode_begin() + while True: + act, r = AI.step(Game) + if r != -1: + totalR += r + if Game.end_game(): + break + Game.GenRandTile(r) + if Game.end_game(): + break + AI.Episode_end() + analysis.eval(Game) + if e % MILESTONE == 0: + print("#Episode: {episode}, score: {score}".format(episode = e, score = totalR)) + totalR = 0 + analysis.printAnalysis(MILESTONE) + analysis.reset() + AI.save_tupleNet() + AI.save_tupleNet() + + else: + totalR = 0 + for i in range(1000): + Game.initialize() + while True: + act, r = AI.step(Game) + if r != -1: + totalR += r + if Game.end_game(): + break + Game.GenRandTile(r) + if Game.end_game(): + break + #Game.printBoard() + print("Score: {}".format(totalR)) diff --git a/READ.md b/READ.md deleted file mode 100644 index 8afafc1..0000000 --- a/READ.md +++ /dev/null @@ -1,23 +0,0 @@ -## Master the 2048 Game -#### Train an AI to crack the game! - -* Live Demo - #TODO - - -* How to Run? ->To Train your AI ->Let the AI Play the Game - - -* N-Tuple Network - #TODO - - -* Temporal Difference Learning - #TODO - - -* Expectimax Search - #TODO - diff --git a/README.md b/README.md new file mode 100644 index 0000000..67343c1 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Master the 2048 Game +###Train an AI to crack the game! + +### Live Demo + #TODO + #build up web server to run the game on local host + +### How to Run? +>To Train your AI +>Let the AI Play the Game + + +### N-Tuple Network + #TODO + + +### Temporal Difference Learning + #TODO + + +### Expectimax Search + #TODO + + + +### Future Development +> +>Implement BitBoard to speed up training +>Try to implement DQN to extract feature and train diff --git a/agent.py b/agent.py new file mode 100644 index 0000000..47c4b47 --- /dev/null +++ b/agent.py @@ -0,0 +1,144 @@ +from board import Board +import numpy as np +import random +import os + + +class Agent(): + def __init__(self): + self.episode = [] + self.net = [] + self.alpha = 0.0025 + self.gamma = 1.0 + if os.path.isfile("tupleNet/tuple1.npy"): + print("Found tuple network") + print("Loading...") + self.load_tupleNet("tupleNet/tuple") + else: + self.build_tupleNet() + + + def build_tupleNet(self): + self.net.append(np.ndarray(shape=(24, 24, 24, 24, 24, 24), dtype=np.float64)) + self.net.append(np.ndarray(shape=(24, 24, 24, 24, 24, 24), dtype=np.float64)) + self.net.append(np.ndarray(shape=(24, 24, 24, 24), dtype=np.float64)) + self.net.append(np.ndarray(shape=(24, 24, 24, 24), dtype=np.float64)) + + def load_tupleNet(self, filename): + for i in range(4): + self.net.append(np.load(filename+str(i+1)+".npy")) + + def save_tupleNet(self): + for i in range(4): + np.save("tupleNet/tuple%d" % (i+1), self.net[i]) + + def updateNet(self, tmp, TD_error): + self.net[0][tmp.getTile(0)][tmp.getTile(4)][tmp.getTile(8)][tmp.getTile(1)][tmp.getTile(5)][tmp.getTile(9)] += TD_error + self.net[1][tmp.getTile(1)][tmp.getTile(5)][tmp.getTile(9)][tmp.getTile(2)][tmp.getTile(6)][tmp.getTile(10)] += TD_error + self.net[2][tmp.getTile(2)][tmp.getTile(6)][tmp.getTile(10)][tmp.getTile(14)] += TD_error + self.net[3][tmp.getTile(3)][tmp.getTile(7)][tmp.getTile(11)][tmp.getTile(15)] += TD_error + + + def getV(self, b): + v = 0.0 + tmp = Board() + for i in range(8): + tmp.copyBoard(b) + tmp.morphBoard(i) + v += self.net[0][tmp.getTile(0)][tmp.getTile(4)][tmp.getTile(8)][tmp.getTile(1)][tmp.getTile(5)][tmp.getTile(9)] + v += self.net[1][tmp.getTile(1)][tmp.getTile(5)][tmp.getTile(9)][tmp.getTile(2)][tmp.getTile(6)][tmp.getTile(10)] + v += self.net[2][tmp.getTile(2)][tmp.getTile(6)][tmp.getTile(10)][tmp.getTile(14)] + v += self.net[3][tmp.getTile(3)][tmp.getTile(7)][tmp.getTile(11)][tmp.getTile(15)] + return v + + + + + def Episode_begin(self): + self.episode = [] + + def Episode_end(self): + #TODO TD-Learning + last = True + while len(self.episode) > 0: + a = self.episode[-1]['after'] + b = self.episode[-1]['before'] + R = self.episode[-1]['reward'] + S_, S = self.getV(a), self.getV(b) + tmp = Board() + for i in range(8): + tmp.copyBoard(b) + tmp.morphBoard(i) + if last == False: + self.updateNet(tmp, self.alpha*(R + S_ - S)) + else: + self.updateNet(tmp, self.alpha*(0 - S)) + last = False + del self.episode[-1] + + + def step(self, prev): + #action = random.randint(0, 3) + #reward = prev.move(action) + #return action, reward + + maxV = float(-1e9) + maxOP = -1 + tmp = Board() + for op in range(4): + tmp.copyBoard(prev) + r = tmp.move(op) + if r != -1: + v = self.getV(tmp) + if v+r >= maxV: + maxV = v+r + maxOP = op + + if maxOP != -1: + r = prev.move(maxOP) + state = { + 'before': prev, + 'after': prev, + 'reward': r, + 'action': maxOP + } + if len(self.episode) > 0: + self.episode[-1]['after'] = prev + self.episode.append(state) + return maxOP, r + else: + return -1, -1 + + """ + state = { + 'before': Board() + 'after': Board() + 'reward': int + 'action': int + } + """ + + + +if __name__ == "__main__": + AI = Agent() + EPISODE = 1001 + for e in range(EPISODE): + B = Board() + B.initialize() + while True: + #print("\nAI's turn") + act, r = AI.step(B) + if B.end_game(): + #B.printBoard() + break + #B.printBoard() + #print("\nEnv's turn") + B.GenRandTile(r) + if B.end_game(): + #B.printBoard() + break + #B.printBoard() + if e % 100 == 0: + print("#Episode: {episode}".format(episode = e)) + B.printBoard() diff --git a/analyze.py b/analyze.py new file mode 100644 index 0000000..66ca1fc --- /dev/null +++ b/analyze.py @@ -0,0 +1,51 @@ +from board import Board +import numpy as np + +class Analyzer(): + def __init__(self): + self.nums = {} + self.top = int(0) + self.reset() + + def eval(self, b): + tiles = b.getBoard() + for row in tiles: + for c in row: + if c != 0: + self.nums[str(c)] += 1 + if c > self.top: + self.top = c + + def printAnalysis(self, milestone): + for i in range(self.top, self.top-5, -1): + print("{}: {:.2%}".format((np.int32(1) << np.int32(i)), (self.nums[str(i)] / milestone))) + + + def reset(self): + self.top = int(0) + self.nums = { + '1': 0, + '2': 0, + '3': 0, + '4': 0, + '5': 0, + '6': 0, + '7': 0, + '8': 0, + '9': 0, + '10': 0, + '11': 0, + '12': 0, + '13': 0, + '14': 0, + '15': 0, + '16': 0, + '17': 0, + '18': 0, + '19': 0, + '20': 0, + '21': 0, + '22': 0, + '23': 0, + '24': 0 + } diff --git a/board.py b/board.py index d62daf4..d5f5806 100644 --- a/board.py +++ b/board.py @@ -5,21 +5,26 @@ class Board(): def __init__(self): self.tile = np.zeros((4, 4), dtype = np.uint32) + self.initialize() def initialize(self): + self.tile = np.zeros((4, 4), dtype = np.uint32) pos = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] random.shuffle(pos) for i in range(2): x = pos[i] // 4 y = pos[i] % 4 num = random.randint(1, 101) - if num <= 75: + if num <= 90: self.tile[x][y] = 1 else: self.tile[x][y] = 2 - def GenRandTile(self): + def GenRandTile(self, prev_reward): + if prev_reward == -1: + return + x, y = -1, -1 while True: pos = random.randint(0, 15) @@ -28,14 +33,14 @@ def GenRandTile(self): y = pos % 4 break num = random.randint(1, 101) - if num <= 75: + if num <= 90: self.tile[x][y] = 1 else: self.tile[x][y] = 2 def copyBoard(self, tmp): - self.tile = tmp.copy() + self.tile = tmp.getBoard().copy() def move(self, op): if op == 0: @@ -117,6 +122,42 @@ def reverse(self): self.reflect_horizontal() self.reflect_vertical() + def morphBoard(self, i): + #if i == 0: keep the same board + if i == 1: + self.reflect_horizontal() + elif i == 2: + self.reflect_vertical() + elif i == 3: + self.reflect_horizontal() + self.reflect_vertical() + elif i == 4: + self.rotate_right() + elif i == 5: + self.rotate_right() + self.reflect_horizontal() + elif i == 6: + self.rotate_right() + self.reflect_vertical() + elif i == 7: + self.rotate_right() + self.reflect_horizontal() + self.reflect_vertical() + + + def end_game(self): + tmp = Board() + tmp.copyBoard(self) + if tmp.move(0) == -1 and tmp.move(0) == tmp.move(1) and tmp.move(1) == tmp.move(2) and tmp.move(2) == tmp.move(3): + return True + else: + return False + + def getBoard(self): + return self.tile + + def getTile(self, pos): + return self.tile[pos // 4][pos % 4] def printBoard(self): print(self.tile) @@ -130,20 +171,23 @@ def printBoard(self): b = Board() b.initialize() b.printBoard() - print() - b.rotate_right() - b.printBoard() - b.rotate_left() - b.printBoard() - print() - b.GenRandTile() - b.move_right() - b.printBoard() - print() - b.GenRandTile() - b.move_up() - b.printBoard() - print() - b.GenRandTile() - b.move_down() - b.printBoard() + r = 0 + while True: + op = input() + if op == 'w': + op = 0 + elif op == 'd': + op = 1 + elif op == 's': + op = 2 + elif op == 'a': + op = 3 + r += b.move(op) + if b.end_game(): + break + b.GenRandTile(r) + b.printBoard() + print("Score: {}\n".format(r)) + if b.end_game(): + break +