aboutsummaryrefslogtreecommitdiffstats
path: root/MCTS.py
diff options
context:
space:
mode:
authorMatt Strapp <strap012@umn.edu>2021-04-26 13:07:59 -0500
committerMatt Strapp <strap012@umn.edu>2021-04-26 13:07:59 -0500
commitec32b13540d248fde8f4c6ef4c0eaeda1889bed8 (patch)
treea8655ac81215b7c82bd74a608780b2f3a80e4cfb /MCTS.py
parentMerge main and some new stuffs (diff)
downloadcsci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar.gz
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar.bz2
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar.lz
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar.xz
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.tar.zst
csci4511w-ec32b13540d248fde8f4c6ef4c0eaeda1889bed8.zip
actually add files
Diffstat (limited to 'MCTS.py')
-rw-r--r--MCTS.py150
1 files changed, 150 insertions, 0 deletions
diff --git a/MCTS.py b/MCTS.py
new file mode 100644
index 0000000..7e81ac6
--- /dev/null
+++ b/MCTS.py
@@ -0,0 +1,150 @@
+import math
+from copy import deepcopy
+from time import clock
+from random import choice
+
+from GameState import GameState
+
+# Based on https://github.com/DieterBuys/mcts-player/
+
+class GameController(object):
+ def get_next_move(self, state):
+ # when you get a new move, it is assumed that the game is not ended yet
+ assert state.get_moves()
+
+class MCTSNode(object):
+ """Monte Carlo Tree Node.
+ Each node encapsulates a particular game state, the moves that
+ are possible from that state and the strategic information accumulated
+ by the tree search as it progressively samples the game space.
+ """
+
+ def __init__(self, state, parent=None, move=None):
+ self.parent = parent
+ self.move = move
+ self.state = state
+
+ self.plays = 0
+ self.score = 0
+
+ self.pending_moves = state.get_moves()
+ self.children = []
+
+ def select_child_ucb(self):
+ # Note that each node's plays count is equal
+ # to the sum of its children's plays
+ def ucb(child):
+ win_ratio = child.score / child.plays \
+ + math.sqrt(2 * math.log(self.plays) / child.plays)
+ return win_ratio
+
+ return max(self.children, key=ucb)
+
+ def expand_move(self, move):
+ self.pending_moves.remove(move) # raises KeyError
+
+ child_state = deepcopy(self.state)
+ child_state.play_move(move)
+
+ child = MCTSNode(state=child_state, parent=self, move=move)
+ self.children.append(child)
+ return child
+
+ def get_score(self, result):
+ # return result
+ if result == 0.5:
+ return result
+
+ if self.state.player == 2:
+ if self.state.next_turn_player == result:
+ return 0.0
+ else:
+ return 1.0
+ else:
+ if self.state.next_turn_player == result:
+ return 1.0
+ else:
+ return 0.0
+
+ if self.state.next_turn_player == result:
+ return 0.0
+ else:
+ return 1.0
+
+ def __repr__(self):
+ s = 'ROOT\n' if self.parent is None else ''
+
+ children_moves = [c.move for c in self.children]
+
+ s += """Score ratio: {score} / {plays}
+Pending moves: {pending_moves}
+Children's moves: {children_moves}
+State:
+{state}\n""".format(children_moves=children_moves, **self.__dict__)
+
+ return s
+
+
+class MCTSGameController(GameController):
+ """Game controller that uses MCTS to determine the next move.
+ This is the class which implements the Monte Carlo Tree Search algorithm.
+ It builds a game tree of MCTSNodes and samples the game space until a set
+ time has elapsed.
+ """
+
+ def select(self):
+ node = self.root_node
+
+ # Descend until we find a node that has pending moves, or is terminal
+ while node.pending_moves == set() and node.children != []:
+ node = node.select_child_ucb()
+
+ return node
+
+ def expand(self, node):
+ assert node.pending_moves != set()
+
+ move = choice(tuple(node.pending_moves))
+ return node.expand_move(move)
+
+ def simulate(self, state, max_iterations=1000):
+ state = deepcopy(state)
+
+ move = state.get_random_move()
+ while move is not None:
+ state.play_move(move)
+ move = state.get_random_move()
+
+ max_iterations -= 1
+ if max_iterations <= 0:
+ return 0.5 # raise exception? (game too deep to simulate)
+
+ return state.game_result
+
+ def update(self, node, result):
+ while node is not None:
+ node.plays += 1
+ node.score += node.get_score(result)
+ node = node.parent
+
+ def get_next_move(self, state, time_allowed=1.0):
+ super(MCTSGameController, self).get_next_move(state)
+
+ # Create new tree (TODO: Preserve some state for better performance?)
+ self.root_node = MCTSNode(state)
+ iterations = 0
+
+ start_time = clock()
+ while clock() < start_time + time_allowed:
+ node = self.select()
+
+ if node.pending_moves != set():
+ node = self.expand(node)
+
+ result = self.simulate(node.state)
+ self.update(node, result)
+
+ iterations += 1
+
+ # Return most visited node's move
+ return max(self.root_node.children, key=lambda n: n.plays).move