diff options
-rw-r--r-- | GameState.py | 144 | ||||
-rw-r--r-- | MCTS.py | 150 |
2 files changed, 0 insertions, 294 deletions
diff --git a/GameState.py b/GameState.py deleted file mode 100644 index eed8f36..0000000 --- a/GameState.py +++ /dev/null @@ -1,144 +0,0 @@ -from random import choice - -# Based on https://github.com/DieterBuys/mcts-player/ - -class GameState(object): - - def __init__(self): - self.next_turn_player = 1 - self.player = None - - @property - def game_result(self): - return None - - def get_moves(self): - return set() - - def get_random_move(self): - moves = self.get_moves() - return choice(tuple(moves)) if moves != set() else None - - def play_move(self, move): - pass - - -class DotsAndBoxesState(GameState): - def __init__(self, nb_rows, nb_cols, player): - super(DotsAndBoxesState, self).__init__() - - self.nb_rows = nb_rows - self.nb_cols = nb_cols - rows = [] - for ri in range(nb_rows + 1): - columns = [] - for ci in range(nb_cols + 1): - columns.append({"v": 0, "h": 0}) - rows.append(columns) - self.board = rows - - self.score = {1: 0, 2: 0} - self.player = player - print("Player: ", player) - - @property - def game_result(self): - def game_decided(nb_cols, nb_rows, scoreP, scoreO): - # the game is decided if the winner is already known even before the game is ended - # you're guaranteed to win the game if you have more than halve of the total points that can be earned - total_points = nb_rows * nb_cols - if scoreP > total_points // 2 or scoreO > total_points // 2: - return True - else: - return False - - # check if the board is full, then decide based on score - free_lines = self.get_moves() - player = self.player - opponent = self.player % 2 + 1 - - if not game_decided(self.nb_cols, self.nb_rows, self.score[player], self.score[opponent]) and len(free_lines) > 0: - return None - elif self.score[player] > self.score[opponent]: - return 1 - elif self.score[player] < self.score[opponent]: - return 0 - else: - return 0.5 - - def get_moves(self): - free_lines = [] - for ri in range(len(self.board)): - row = self.board[ri] - for ci in range(len(row)): - cell = row[ci] - if ri < (len(self.board) - 1) and cell["v"] == 0: - free_lines.append((ri, ci, "v")) - if ci < (len(row) - 1) and cell["h"] == 0: - free_lines.append((ri, ci, "h")) - return set(free_lines) - - def play_move(self, move): - r, c, o = move - assert move in self.get_moves() - - # check if this move makes a box - makes_box = False - if o == "h": - if r - 1 >= 0: - # check above - if self.board[r-1][c]["h"] != 0 and self.board[r-1][c]["v"] != 0 and self.board[r-1][c+1]["v"] != 0: - makes_box = True - self.score[self.next_turn_player] += 1 - if r + 1 <= self.nb_rows: - # check below - if self.board[r+1][c]["h"] != 0 and self.board[r][c]["v"] != 0 and self.board[r][c+1]["v"] != 0: - makes_box = True - self.score[self.next_turn_player] += 1 - - elif o == "v": - if c - 1 >= 0: - # check left - if self.board[r][c-1]["v"] != 0 and self.board[r][c-1]["h"] != 0 and self.board[r+1][c-1]["h"] != 0: - makes_box = True - self.score[self.next_turn_player] += 1 - - if c + 1 <= self.nb_cols: - # check right - if self.board[r][c+1]["v"] != 0 and self.board[r][c]["h"] != 0 and self.board[r+1][c]["h"] != 0: - makes_box = True - self.score[self.next_turn_player] += 1 - - - # register move - self.board[r][c][o] = self.next_turn_player - - if not makes_box: - # switch turns - self.next_turn_player = self.next_turn_player % 2 + 1 - - def __repr__(self): - str = "" - for r in range(self.nb_rows + 1): - for o in ["h", "v"]: - for c in range(self.nb_cols + 1): - if o == "h": - str += "." - if c != self.nb_cols: - if self.board[r][c][o] == 0: - str += " " - else: - str += "__" - else: - str += "\n" - elif o == "v": - if r != self.nb_rows: - if self.board[r][c][o] == 0: - str += " " - else: - str += "|" - if c != self.nb_cols: - str += " " - else: - str += "\n" - return str diff --git a/MCTS.py b/MCTS.py deleted file mode 100644 index 7e81ac6..0000000 --- a/MCTS.py +++ /dev/null @@ -1,150 +0,0 @@ -import math -from copy import deepcopy -from time import clock -from random import choice - -from GameState import GameState - -# Based on https://github.com/DieterBuys/mcts-player/ - -class GameController(object): - def get_next_move(self, state): - # when you get a new move, it is assumed that the game is not ended yet - assert state.get_moves() - -class MCTSNode(object): - """Monte Carlo Tree Node. - Each node encapsulates a particular game state, the moves that - are possible from that state and the strategic information accumulated - by the tree search as it progressively samples the game space. - """ - - def __init__(self, state, parent=None, move=None): - self.parent = parent - self.move = move - self.state = state - - self.plays = 0 - self.score = 0 - - self.pending_moves = state.get_moves() - self.children = [] - - def select_child_ucb(self): - # Note that each node's plays count is equal - # to the sum of its children's plays - def ucb(child): - win_ratio = child.score / child.plays \ - + math.sqrt(2 * math.log(self.plays) / child.plays) - return win_ratio - - return max(self.children, key=ucb) - - def expand_move(self, move): - self.pending_moves.remove(move) # raises KeyError - - child_state = deepcopy(self.state) - child_state.play_move(move) - - child = MCTSNode(state=child_state, parent=self, move=move) - self.children.append(child) - return child - - def get_score(self, result): - # return result - if result == 0.5: - return result - - if self.state.player == 2: - if self.state.next_turn_player == result: - return 0.0 - else: - return 1.0 - else: - if self.state.next_turn_player == result: - return 1.0 - else: - return 0.0 - - if self.state.next_turn_player == result: - return 0.0 - else: - return 1.0 - - def __repr__(self): - s = 'ROOT\n' if self.parent is None else '' - - children_moves = [c.move for c in self.children] - - s += """Score ratio: {score} / {plays} -Pending moves: {pending_moves} -Children's moves: {children_moves} -State: -{state}\n""".format(children_moves=children_moves, **self.__dict__) - - return s - - -class MCTSGameController(GameController): - """Game controller that uses MCTS to determine the next move. - This is the class which implements the Monte Carlo Tree Search algorithm. - It builds a game tree of MCTSNodes and samples the game space until a set - time has elapsed. - """ - - def select(self): - node = self.root_node - - # Descend until we find a node that has pending moves, or is terminal - while node.pending_moves == set() and node.children != []: - node = node.select_child_ucb() - - return node - - def expand(self, node): - assert node.pending_moves != set() - - move = choice(tuple(node.pending_moves)) - return node.expand_move(move) - - def simulate(self, state, max_iterations=1000): - state = deepcopy(state) - - move = state.get_random_move() - while move is not None: - state.play_move(move) - move = state.get_random_move() - - max_iterations -= 1 - if max_iterations <= 0: - return 0.5 # raise exception? (game too deep to simulate) - - return state.game_result - - def update(self, node, result): - while node is not None: - node.plays += 1 - node.score += node.get_score(result) - node = node.parent - - def get_next_move(self, state, time_allowed=1.0): - super(MCTSGameController, self).get_next_move(state) - - # Create new tree (TODO: Preserve some state for better performance?) - self.root_node = MCTSNode(state) - iterations = 0 - - start_time = clock() - while clock() < start_time + time_allowed: - node = self.select() - - if node.pending_moves != set(): - node = self.expand(node) - - result = self.simulate(node.state) - self.update(node, result) - - iterations += 1 - - # Return most visited node's move - return max(self.root_node.children, key=lambda n: n.plays).move |