1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
class Algo: # A class for defining algorithms used (minimax and alpha-beta pruning)
def miniMax(State, Ply_num): # Function for the minimax algorithm
for i in range(State.Current.dimY):
for j in range(State.Current.dimX):
if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children:
State.Make(j, i, True)
if Ply_num < 2:
return (i, j)
Minimum_Score = 1000
i = 0
j = 0
for k, z in State.children.items():
Result = Algo.Maximum(z, Ply_num - 1, Minimum_Score)
if Minimum_Score > Result:
Minimum_Score = Result
i = k[0]
j = k[1]
return (i, j)
def Maximum(State, Ply_num, Alpha): # Alpha-beta pruning function for taking care of Alpha values
if Ply_num == 0:
return State.CurrentScore
for i in range(State.Current.dimY):
for j in range(State.Current.dimX):
if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children:
State.Make(j, i, False)
Maximum_Score = -1000
i = 0
j = 0
for k, z in State.children.items():
Result = Algo.Minimum(z, Ply_num - 1, Maximum_Score)
if Maximum_Score < Result:
Maximum_Score = Result
if Result > Alpha:
return Result
return Maximum_Score
def Minimum(State, Ply_num, Beta): # Alpha-beta pruning function for taking care of Beta values
if Ply_num == 0:
return State.CurrentScore
for i in range(State.Current.dimY):
for j in range(State.Current.dimX):
if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children:
State.Make(j, i, True)
Minimum_Score = 1000
i = 0
j = 0
for k, z in State.children.items():
Result = Algo.Maximum(z, Ply_num - 1, Minimum_Score)
if Minimum_Score > Result:
Minimum_Score = Result
if Result < Beta:
return Result
return Minimum_Score
class MCTS:
# main function for the Monte Carlo Tree Search
def monte_carlo_tree_search(root):
while resources_left(time, computational power):
leaf = traverse(root)
simulation_result = rollout(leaf)
backpropagate(leaf, simulation_result)
return best_child(root)
# function for node traversal
def traverse(node):
while fully_expanded(node):
node = best_uct(node)
# in case no children are present / node is terminal
return pick_univisted(node.children) or node
# function for the result of the simulation
def rollout(node):
while non_terminal(node):
node = rollout_policy(node)
return result(node)
# function for randomly selecting a child node
def rollout_policy(node):
return pick_random(node.children)
# function for backpropagation
def backpropagate(node, result):
if is_root(node) return
node.stats = update_stats(node, result)
backpropagate(node.parent)
# function for selecting the best child
# node with highest number of visits
def best_child(node):
#pick child with highest number of visits
|