diff options
Diffstat (limited to '')
| -rw-r--r-- | GameState.py | 144 | ||||
| -rw-r--r-- | MCTS.py | 150 | 
2 files changed, 0 insertions, 294 deletions
diff --git a/GameState.py b/GameState.py deleted file mode 100644 index eed8f36..0000000 --- a/GameState.py +++ /dev/null @@ -1,144 +0,0 @@ -from random import choice - -# Based on https://github.com/DieterBuys/mcts-player/ - -class GameState(object): - -    def __init__(self): -        self.next_turn_player = 1 -        self.player = None - -    @property -    def game_result(self): -        return None - -    def get_moves(self): -        return set() - -    def get_random_move(self): -        moves = self.get_moves() -        return choice(tuple(moves)) if moves != set() else None - -    def play_move(self, move): -        pass - - -class DotsAndBoxesState(GameState): -    def __init__(self, nb_rows, nb_cols, player): -        super(DotsAndBoxesState, self).__init__() - -        self.nb_rows = nb_rows -        self.nb_cols = nb_cols -        rows = [] -        for ri in range(nb_rows + 1): -            columns = [] -            for ci in range(nb_cols + 1): -                columns.append({"v": 0, "h": 0}) -            rows.append(columns) -        self.board = rows - -        self.score = {1: 0, 2: 0} -        self.player = player -        print("Player: ", player) - -    @property -    def game_result(self): -        def game_decided(nb_cols, nb_rows, scoreP, scoreO): -            # the game is decided if the winner is already known even before the game is ended -            # you're guaranteed to win the game if you have more than halve of the total points that can be earned -            total_points = nb_rows * nb_cols -            if scoreP > total_points // 2 or scoreO > total_points // 2: -                return True -            else: -                return False - -        # check if the board is full, then decide based on score -        free_lines = self.get_moves() -        player = self.player -        opponent = self.player % 2 + 1 - -        if not game_decided(self.nb_cols, self.nb_rows, self.score[player], self.score[opponent]) and len(free_lines) > 0: -            return None -        elif self.score[player] > self.score[opponent]: -            return 1 -        elif self.score[player] < self.score[opponent]: -            return 0 -        else: -            return 0.5 - -    def get_moves(self): -        free_lines = [] -        for ri in range(len(self.board)): -            row = self.board[ri] -            for ci in range(len(row)): -                cell = row[ci] -                if ri < (len(self.board) - 1) and cell["v"] == 0: -                    free_lines.append((ri, ci, "v")) -                if ci < (len(row) - 1) and cell["h"] == 0: -                    free_lines.append((ri, ci, "h")) -        return set(free_lines) - -    def play_move(self, move): -        r, c, o = move -        assert move in self.get_moves() - -        # check if this move makes a box -        makes_box = False -        if o == "h": -            if r - 1 >= 0: -                # check above -                if self.board[r-1][c]["h"] != 0 and self.board[r-1][c]["v"] != 0 and self.board[r-1][c+1]["v"] != 0: -                    makes_box = True -                    self.score[self.next_turn_player] += 1 -            if r + 1 <= self.nb_rows: -                # check below -                if self.board[r+1][c]["h"] != 0 and self.board[r][c]["v"] != 0 and self.board[r][c+1]["v"] != 0: -                    makes_box = True -                    self.score[self.next_turn_player] += 1 - -        elif o == "v": -            if c - 1 >= 0: -                # check left -                if self.board[r][c-1]["v"] != 0 and self.board[r][c-1]["h"] != 0 and self.board[r+1][c-1]["h"] != 0: -                    makes_box = True -                    self.score[self.next_turn_player] += 1 - -            if c + 1 <= self.nb_cols: -                # check right -                if self.board[r][c+1]["v"] != 0 and self.board[r][c]["h"] != 0 and self.board[r+1][c]["h"] != 0: -                    makes_box = True -                    self.score[self.next_turn_player] += 1 - - -        # register move -        self.board[r][c][o] = self.next_turn_player - -        if not makes_box: -            # switch turns -            self.next_turn_player = self.next_turn_player % 2 + 1 - -    def __repr__(self): -        str = "" -        for r in range(self.nb_rows + 1): -            for o in ["h", "v"]: -                for c in range(self.nb_cols + 1): -                    if o == "h": -                        str += "." -                        if c != self.nb_cols: -                            if self.board[r][c][o] == 0: -                                str += "  " -                            else: -                                str += "__" -                        else: -                            str += "\n" -                    elif o == "v": -                        if r != self.nb_rows: -                            if self.board[r][c][o] == 0: -                                str += " " -                            else: -                                str += "|" -                        if c != self.nb_cols: -                            str += "  " -                        else: -                            str += "\n" -        return str diff --git a/MCTS.py b/MCTS.py deleted file mode 100644 index 7e81ac6..0000000 --- a/MCTS.py +++ /dev/null @@ -1,150 +0,0 @@ -import math -from copy import deepcopy -from time import clock -from random import choice - -from GameState import GameState - -# Based on https://github.com/DieterBuys/mcts-player/ - -class GameController(object): -    def get_next_move(self, state): -        # when you get a new move, it is assumed that the game is not ended yet -        assert state.get_moves() - -class MCTSNode(object): -    """Monte Carlo Tree Node. -    Each node encapsulates a particular game state, the moves that -    are possible from that state and the strategic information accumulated -    by the tree search as it progressively samples the game space. -    """ - -    def __init__(self, state, parent=None, move=None): -        self.parent = parent -        self.move = move -        self.state = state - -        self.plays = 0 -        self.score = 0 - -        self.pending_moves = state.get_moves() -        self.children = [] - -    def select_child_ucb(self): -        # Note that each node's plays count is equal -        # to the sum of its children's plays -        def ucb(child): -            win_ratio = child.score / child.plays \ -                        + math.sqrt(2 * math.log(self.plays) / child.plays) -            return win_ratio - -        return max(self.children, key=ucb) - -    def expand_move(self, move): -        self.pending_moves.remove(move)  # raises KeyError - -        child_state = deepcopy(self.state) -        child_state.play_move(move) - -        child = MCTSNode(state=child_state, parent=self, move=move) -        self.children.append(child) -        return child - -    def get_score(self, result): -        # return result -        if result == 0.5: -            return result - -        if self.state.player == 2: -            if self.state.next_turn_player == result: -                return 0.0 -            else: -                return 1.0 -        else: -            if self.state.next_turn_player == result: -                return 1.0 -            else: -                return 0.0 - -        if self.state.next_turn_player == result: -            return 0.0 -        else: -            return 1.0 - -    def __repr__(self): -        s = 'ROOT\n' if self.parent is None else '' - -        children_moves = [c.move for c in self.children] - -        s += """Score ratio: {score} / {plays} -Pending moves: {pending_moves} -Children's moves: {children_moves} -State: -{state}\n""".format(children_moves=children_moves, **self.__dict__) - -        return s - - -class MCTSGameController(GameController): -    """Game controller that uses MCTS to determine the next move. -    This is the class which implements the Monte Carlo Tree Search algorithm. -    It builds a game tree of MCTSNodes and samples the game space until a set -    time has elapsed. -    """ - -    def select(self): -        node = self.root_node - -        # Descend until we find a node that has pending moves, or is terminal -        while node.pending_moves == set() and node.children != []: -            node = node.select_child_ucb() - -        return node - -    def expand(self, node): -        assert node.pending_moves != set() - -        move = choice(tuple(node.pending_moves)) -        return node.expand_move(move) - -    def simulate(self, state, max_iterations=1000): -        state = deepcopy(state) - -        move = state.get_random_move() -        while move is not None: -            state.play_move(move) -            move = state.get_random_move() - -            max_iterations -= 1 -            if max_iterations <= 0: -                return 0.5  # raise exception? (game too deep to simulate) - -        return state.game_result - -    def update(self, node, result): -        while node is not None: -            node.plays += 1 -            node.score += node.get_score(result) -            node = node.parent - -    def get_next_move(self, state, time_allowed=1.0): -        super(MCTSGameController, self).get_next_move(state) - -        # Create new tree (TODO: Preserve some state for better performance?) -        self.root_node = MCTSNode(state) -        iterations = 0 - -        start_time = clock() -        while clock() < start_time + time_allowed: -            node = self.select() - -            if node.pending_moves != set(): -                node = self.expand(node) - -            result = self.simulate(node.state) -            self.update(node, result) - -            iterations += 1 - -        # Return most visited node's move -        return max(self.root_node.children, key=lambda n: n.plays).move  | 
