1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
import math
from copy import deepcopy
from time import perf_counter
from random import choice
from GameState import GameState
# Based on https://github.com/DieterBuys/mcts-player/
class GameController(object):
def get_next_move(self, state):
# when you get a new move, it is assumed that the game is not ended yet
assert state.get_moves()
class MCTSNode(object):
"""Monte Carlo Tree Node.
Each node encapsulates a particular game state, the moves that
are possible from that state and the strategic information accumulated
by the tree search as it progressively samples the game space.
"""
def __init__(self, state, parent=None, move=None):
self.parent = parent
self.move = move
self.state = state
self.plays = 0
self.score = 0
self.pending_moves = state.get_moves()
self.children = []
def select_child_ucb(self):
# Note that each node's plays count is equal
# to the sum of its children's plays
def ucb(child):
win_ratio = child.score / child.plays \
+ math.sqrt(2 * math.log(self.plays) / child.plays)
return win_ratio
return max(self.children, key=ucb)
def expand_move(self, move):
self.pending_moves.remove(move) # raises KeyError
child_state = deepcopy(self.state)
child_state.play_move(move)
child = MCTSNode(state=child_state, parent=self, move=move)
self.children.append(child)
return child
def get_score(self, result):
# return result
if result == 0.5:
return result
if self.state.player == 2:
if self.state.next_turn_player == result:
return 0.0
else:
return 1.0
else:
if self.state.next_turn_player == result:
return 1.0
else:
return 0.0
if self.state.next_turn_player == result:
return 0.0
else:
return 1.0
def __repr__(self):
s = 'ROOT\n' if self.parent is None else ''
children_moves = [c.move for c in self.children]
s += """Score ratio: {score} / {plays}
Pending moves: {pending_moves}
Children's moves: {children_moves}
State:
{state}\n""".format(children_moves=children_moves, **self.__dict__)
return s
class MCTSGameController(GameController):
"""Game controller that uses MCTS to determine the next move.
This is the class which implements the Monte Carlo Tree Search algorithm.
It builds a game tree of MCTSNodes and samples the game space until a set
time has elapsed.
"""
def select(self):
node = self.root_node
# Descend until we find a node that has pending moves, or is terminal
while node.pending_moves == set() and node.children != []:
node = node.select_child_ucb()
return node
def expand(self, node):
assert node.pending_moves != set()
move = choice(tuple(node.pending_moves))
return node.expand_move(move)
def simulate(self, state, max_iterations=1000):
state = deepcopy(state)
move = state.get_random_move()
while move is not None:
state.play_move(move)
move = state.get_random_move()
max_iterations -= 1
if max_iterations <= 0:
return 0.5 # raise exception? (game too deep to simulate)
return state.game_result
def update(self, node, result):
while node is not None:
node.plays += 1
node.score += node.get_score(result)
node = node.parent
def get_next_move(self, state, time_allowed=1.0):
super(MCTSGameController, self).get_next_move(state)
self.root_node = MCTSNode(state)
iterations = 0
start_time = perf_counter()
while perf_counter() < start_time + time_allowed:
node = self.select()
if node.pending_moves != set():
node = self.expand(node)
result = self.simulate(node.state)
self.update(node, result)
iterations += 1
# Return most visited node's move
return max(self.root_node.children, key=lambda n: n.plays).move
|