class Algo: # A class for defining algorithms used (minimax and alpha-beta pruning) def miniMax(State, Ply_num): # Function for the minimax algorithm for i in range(State.Current.dimY): for j in range(State.Current.dimX): if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children: State.Make(j, i, True) if Ply_num < 2: return (i, j) Minimum_Score = 1000 i = 0 j = 0 for k, z in State.children.items(): Result = Algo.Maximum(z, Ply_num - 1, Minimum_Score) if Minimum_Score > Result: Minimum_Score = Result i = k[0] j = k[1] return (i, j) def Maximum(State, Ply_num, Alpha): # Alpha-beta pruning function for taking care of Alpha values if Ply_num == 0: return State.CurrentScore for i in range(State.Current.dimY): for j in range(State.Current.dimX): if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children: State.Make(j, i, False) Maximum_Score = -1000 i = 0 j = 0 for k, z in State.children.items(): Result = Algo.Minimum(z, Ply_num - 1, Maximum_Score) if Maximum_Score < Result: Maximum_Score = Result if Result > Alpha: return Result return Maximum_Score def Minimum(State, Ply_num, Beta): # Alpha-beta pruning function for taking care of Beta values if Ply_num == 0: return State.CurrentScore for i in range(State.Current.dimY): for j in range(State.Current.dimX): if State.Current.Mat[i][j] == ' ' and (j, i) not in State.children: State.Make(j, i, True) Minimum_Score = 1000 i = 0 j = 0 for k, z in State.children.items(): Result = Algo.Maximum(z, Ply_num - 1, Minimum_Score) if Minimum_Score > Result: Minimum_Score = Result if Result < Beta: return Result return Minimum_Score class MCTS: # main function for the Monte Carlo Tree Search def monte_carlo_tree_search(root): while resources_left(time, computational power): leaf = traverse(root) simulation_result = rollout(leaf) backpropagate(leaf, simulation_result) return best_child(root) # function for node traversal def traverse(node): while fully_expanded(node): node = best_uct(node) # in case no children are present / node is terminal return pick_univisted(node.children) or node # function for the result of the simulation def rollout(node): while non_terminal(node): node = rollout_policy(node) return result(node) # function for randomly selecting a child node def rollout_policy(node): return pick_random(node.children) # function for backpropagation def backpropagate(node, result): if is_root(node) return node.stats = update_stats(node, result) backpropagate(node.parent) # function for selecting the best child # node with highest number of visits def best_child(node): #pick child with highest number of visits