From cfdb6ce88244a464601fc0f66f7cfc4961ecadcf Mon Sep 17 00:00:00 2001 From: RossTheRoss Date: Sun, 2 May 2021 21:29:36 -0500 Subject: Do project paper --- csci4511w/4511proj.bib | 82 +++++++++++++++++++++++++ csci4511w/4511proj.tex | 163 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 csci4511w/4511proj.bib create mode 100644 csci4511w/4511proj.tex diff --git a/csci4511w/4511proj.bib b/csci4511w/4511proj.bib new file mode 100644 index 0000000..974b1be --- /dev/null +++ b/csci4511w/4511proj.bib @@ -0,0 +1,82 @@ +@book{GeneralGame, + AUTHOR = {Goldwaser, A. and Thielscher, M.}, + PUBLISHER = {University of New South Wales}, + TITLE = {Deep Reinforcement Learning for General Game Playing}, + URL = {https://ojs.aaai.org//index.php/AAAI/article/view/5533}, + YEAR = {April 3 2020} +} +@book{MCTS, + AUTHOR = {Chaslot, G. and Bakkes, S. and Szita, I. and Spronck, P.}, + PUBLISHER = {Universiteit Maastricht / MICC}, + TITLE = {Monte-Carlo Tree Search: A New Framework for Game AI}, + URL = {https://www.aaai.org/Papers/AIIDE/2008/AIIDE08-036.pdf}, + YEAR = {2008} +} +@book{Minimax, + AUTHOR = {Kang, X. and Wang, Y. and Yanrui, H.}, + PUBLISHER = {Dalian Polytechnic University}, + TITLE = {Research on Different Heuristics for Minimax Algorithm Insight from Connect-4 Game}, + URL = {https://www.scirp.org/pdf/JILSA_2019030615385424.pdf}, + YEAR = {March 4 2019} +} +@inproceedings{IEEE2015, + AUTHOR = {Y. {Zhuang} and S. {Li} and T. V. {Peters} and C. {Zhang}}, + BOOKTITLE = {2015 IEEE Conference on Computational Intelligence and Games (CIG)}, + DOI = {10.1109/CIG.2015.7317912}, + NUMBER = {}, + PAGES = {314-321}, + TITLE = {Improving Monte-Carlo tree search for dots-and-boxes with a novel board representation and artificial neural networks}, + VOLUME = {}, + YEAR = {2015} +} +@article{Cornell1998, + author = {Lex Weaver and + Terry Bossomaier}, + title = {Evolution of Neural Networks to Play the Game of Dots-and-Boxes}, + journal = {CoRR}, + volume = {cs.NE/9809111}, + year = {1998}, + url = {https://arxiv.org/abs/cs/9809111}, + timestamp = {Fri, 10 Jan 2020 12:58:53 +0100}, + biburl = {https://dblp.org/rec/journals/corr/cs-NE-9809111.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +@article{Barker_Korf_2012, + author={Barker, Joseph and Korf, Richard}, + title = {Solving Dots-And-Boxes}, + volume = {26}, + url = {https://ojs.aaai.org/index.php/AAAI/article/view/8144}, + number = {1}, + journal={Proceedings of the AAAI Conference on Artificial Intelligence}, + year={2012}, + month={July} +} +@inproceedings{berlekamp2000, + AUTHOR = {Berlekamp, Elwyn and Scott, Katherine}, + BOOKTITLE = {More Games of No Chance, Proc. MSRI Workshop on Combinatorial Games}, + PAGES = {317--330}, + TITLE = {Forcing your opponent to stay in control of a loony dots-and-boxes endgame}, + YEAR = {2000} +} +@misc{buzzard2014, + ARCHIVEPREFIX = {arXiv}, + AUTHOR = {Kevin Buzzard and Michael Ciere}, + EPRINT = {1305.2156}, + PRIMARYCLASS = {math.CO}, + TITLE = {Playing simple loony dots and boxes endgames optimally}, + YEAR = {2014} +} +@misc{allcock2019best, + ARCHIVEPREFIX = {arXiv}, + AUTHOR = {Daniel Allcock}, + EPRINT = {1811.10747}, + PRIMARYCLASS = {math.CO}, + TITLE = {Best play in Dots and Boxes endgames}, + YEAR = {2019} +} + @misc{wlson_2002, + AUTHOR = {Wlson, David}, + JOURNAL = {Dots-and-Boxes Analysis Index}, + URL = {https://wilson.engr.wisc.edu/boxes/}, + YEAR = {2002} +} \ No newline at end of file diff --git a/csci4511w/4511proj.tex b/csci4511w/4511proj.tex new file mode 100644 index 0000000..8ed0eda --- /dev/null +++ b/csci4511w/4511proj.tex @@ -0,0 +1,163 @@ +\documentclass[12pt]{article} +\usepackage{fullpage}\usepackage{indentfirst}\usepackage{booktabs} +\setcounter{topnumber}{10} %% default 2 +\setcounter{bottomnumber}{10} %% default 1 +\setcounter{totalnumber}{10} %% default 3 +\renewcommand{\topfraction}{1} %% default .7 +\renewcommand{\bottomfraction}{1} %% default 0.3 +\renewcommand{\textfraction}{0} %% default .2 +\renewcommand{\floatpagefraction}{0.8} %% default .5 + +\title{CSCI 4511W Final Project Report: Dots and Boxes} +\author{Jack Vehmeier, Andrea Smith, Matt Strapp} +\date{2021/04/27} + +\begin{document} + \maketitle + \begin{abstract} + La Pipopipette, or Dots and Boxes, is a simple paper and pen game designed by Édouard Lucas in the nineteenth century. The game is similar to tic tac toe in that it is quick, easy to learn, and popular amongst children or bored people with access to paper and writing utensils. We are all familiar with this game from our own childhoods and were curious if we could design an AI opponent that can compete and potentially outperform humans. To find which AI was best we designed various algorithms (Minimax, Monte Carlo, modified Minimax with an iterative deepening element) and compared them against each other to see which would be best to compete with a human. + \end{abstract} + + \section{Description} + Dots and Boxes is played on an empty grid of dots. Players take turns placing horizontal or vertical lines between dots. A player is able to score by placing the last line to “close” a box. Early game dots and boxes consists of moves that are essentially random, with both players trying to avoid placing the third line in a box. Often this produces chains, a set of adjacent boxes where any placed line will result in the opponent scoring all of the boxes in said chain. More advanced strategies like the double-cross strategy result in the players fighting for control, trying to force the other to create the first long chain, which will allow the second player to control the latter half of the game and usually win. + + This creates an interesting programming problem, because while the rules are relatively simple, the state space is quite large. Early game dots and boxes is especially complex, so much so that early moves are almost arbitrary. Therefore, a successful agent will have to efficiently manage memory while examining many possible moves from a given state space in order to choose the best one. As explored in the research done by Joseph Barker and Richard Korf, an \(m * n\) sized board has \(p = m(n+1) + n(m+1)\) edge spaces where a line could be placed, and \(2^p\) possible states. Given this information, simple methods like depth first search would explore every possible move, but produce \(p!\) states-- i.e. A 4\(\times\)4 board has 40 edges and therefore a search space of \(40!\). The problem clearly requires a more efficient algorithm. + + Time and memory efficiency, however, is not our end goal. Our primary objective is to explore what algorithms have the highest win rate. We are simply limited by the computing power of our own machines and the time constraints of this project’s completion date. Therefore, we selected several algorithms and pitted them against each other to see which has the highest win rate. + + + \section{Background} + + \subsection{Search Algorithms} + Implementing AI into a non-trivial game such as dots and boxes requires knowledge on how AI can be implemented into a variety of different games. Once we can see how algorithms are adjusted to efficiently play different types of games we can accurately and efficiently create an algorithm to always win dots and boxes. + + A general algorithm called AlphaZero creates a general game playing agent which can use probability distribution along with Monte Carlo Tree search simulations to play a variety of games at a high level.~\cite{GeneralGame} In Goldwaser and Thielscher's paper they discuss how Monte Carlo Search can have memory problems if games are large enough. If we can implement a way to vary the board size, memory will become an issue if the board becomes large. A way to fix a memory issue is to have an implementation of a copy of the tree that only tracks the state changes of the game in question. This gives an extra \(O(logn)\) memory per state change by having access to the copy of the tree.~\cite{GeneralGame} Looking at fixes to memory issues using simple methods will be important to represent in our algorithms for creating the unbeatable AI for dots and boxes. Monte Carlo Tree search is able to be applied to a variety of games as seen before in AlphaZero. Before implementing MCTS it is important to understand its strengths and drawbacks and identify if it can be applied to our game at all. MCTS is best when it is playing games that have a finite length and when players make a single move per turn. It uses the steps of Selection of Start node, Expansion upon node, Simulation of outcome due to further expansions, and finally Back-tracking to navigate a game tree in order to give the AI the best possible path to win.~\cite{MCTS} + + While it is stated that MCTS can be effectively implemented for classic board games in the Universiteit Maastricht's paper, I see a possible issue in the fact that if a box is completed in dots and boxes the player that completed the box goes again. Monte Carlo search should be able to be modified to accommodate for this, but another algorithm such as Minimax with AlphaBeta pruning should be explored as well to see if dots and boxes can be implemented more effectively. Minimax is an algorithm which uses two agents (Min and Max) to navigate a tree and evaluate current states of the game in order to find an optimal move.~\cite{Minimax} Dalian Polytechnic University discusses that Minimax is perfect for a game like connect 4 due to the fact that some nodes inherently have no impact on the final outcome of the game due to the nature of the game itself. These nodes can be pruned using AlphaBeta. + + \subsection{Already Existing Methods} + + A question that arose during brainstorming is how AI would pick which lines to fill in the early stage of dots and boxes because it seems like there is no move that is better than the other. It is shown that a move like this may seem random, but if a heuristic is created in a way it can simulate which lines are filled in to win a majority of games and prioritize these lines to be filled in during the early stages of the dots and boxes game.~\cite{Minimax} + + In existing scholarly research, it appears most computer scientists approach for Dots and Boxes in two ways: creating an agent to solve a pre-existing problem (i.e.\ predicting the outcome of a partially filled out board) or creating an agent that evaluates moves at each state of the game. Because our project is focused on agents of the latter type, we will discuss agents that make move recommendations at each stage. + + There already exists a number of strong Dots and Boxes agents. One such agent was developed by David Wilson in 2010~\cite{Barker_Korf_2012}.The solver, for each stage, starts at the end of the game with all lines filled in. It then proceeds to the next stage with one less line filled and computes a score for the possible move. Moves are given a positive score if one or two boxes were formed, and a negative score if no boxes are formed. It then moves on to the stage with two less lines filled in, repeating until it reaches the original position. Once it returns to the original position of that stage, the solver has computed all possible moves and keeps the best score. Wilson has used this program to solve previously unsolved problems from the well-known \emph{The Dots-and-Boxes Game} by Elwyn Berlekamp. This solution generates \(2^p\) states per stage, and while that is quite fast, it is not nearly as fast as the \(p!\) that would be generated by a more naive solution such as depth-first search. The greatest pitfall of Wilson’s solver is its memory and time requirement, especially for non-square boards. For example, a 4\(\times\)5 board problem would need 8 terabytes of disk space, 9 gigabytes of RAM, and would need to run for 130 days before being solved~\cite{wlson_2002}. + + Therefore, while Wilson’s solver is an excellent implementation to solve difficult pre-existing problems, there exist faster and less memory-intensive Dots and Boxes agents. In fact, the agent created by Joseph Barker and Richard Korf in 2012 was efficient enough to solve the 4\(\times\)5 board problem~\cite{Barker_Korf_2012}. The agent uses Alpha-Beta search as its main solving method, and takes advantage of several other strategies to minimize unnecessary repetitions and amount of nodes explored to find an optimal solution. The first of these strategies is for the solver to consider chaining-- where several boxes are connected into a ``tube'' with one or both ends open. It uses a preprocessing step when it encounters a chain to capture all provably optimal boxes. If the resulting next move is a hard-hearted handout (that is, a set of two boxes capturable by a single line), it will either capture the handout and be forced to make the next move or leave it for the opponent to capture. In doing this, the solver essentially only considers one state but with a different player, eliminating the need to produce another state in the problem space. The agent also employs a transposition table that stores explored states in a table with its associated minimax value. If a repeated state is encountered, its value is retrieved from memory instead of re-determining the minimax value. This further optimizes the solution by cutting down time spent processing duplicate states. The agent also considers that certain moves that could be made on symmetrical board spaces result in the same minimax values, and therefore can choose a move without considering both spaces. Lastly, the agent is given a move-ordering heuristic as it uses Alpha-Beta search to traverse children of a node in order to make better moves earlier on to narrow the endgame search bounds. By using these strategies, the agent solved the previously unsolved 4\(\times\)5 board in 10 days. It determined that, given optimal play on both sides, the 4\(\times\)5 game will always result in a tie. + + While literature concerning Dots and Boxes is relatively scarce compared to other computational science studies, some research has been done on improving existing techniques for solving the game. Neural networking, while slightly outside of the scope of our project, is an important furtherment of game playing research. One preliminary attempt at applying neural networks to Dots and Boxes was an agent created in 1998 by Lex Weaver and Terry Bossomaier. They assert that some game-playing agents, despite being grandmaster rank, sometimes mishandle problems that are easily solved by human novices~\cite{Cornell1998}. These issues could be solved by giving the agent feedback of which games it wins and loses. Therefore, they used a genetic algorithm to weight their network and pitted it against heuristic-using players as a benchmark. While this network successfully improved while playing against lower level agents, it struggled to learn strategies beyond randomly creating boxes. In order to advance to higher level strategies, such as avoiding filling out the third side of boxes (which gives the opponent a free box), the program’s architecture required a more sparse neural network. + + More recent research has been more successful, such as a Monte-Carlo Tree Search (MCTS) based agent created in 2015 called QDab, QDab improves upon MCTS by using a neural network to select a move instead of a random move. The neural network predicts the winning probability and each stage, and its training data was randomly generated board states with minimax search performed on it to find the resulting score if both opponents play optimally. These random boards were generated to be mid-game, as early game winning probability for Dots and Boxes is extremely complex to compute. The agent also uses other strategies that have not necessarily to be proven to be correct, such as stopping the simulation at a board state whose winning strategy can easily be found with minimax search, adding an auxiliary greedy policy to reduce the number of branches, optimizing the exploration order for MCTS, pruning nodes, and parallelization for multiple processing. This neural network using a version of MCTS proved fairly successful, winning nearly 100\% of its matches against a normal MCTS implementation when given a set ``thinking time'' of 20 seconds. This winning percentage fluctuates if QDab goes second, but can be counterbalanced with more thinking time. + + \subsection{The Theory Behind Dots and Boxes} + + One of the most important parts of creating an AI to play a game is for the AI to know the rules of the game and the optimal moves. There exist papers from the field of game theory that solve these problems. These papers usually involve games that are already partially completed, where the question becomes what the ideal next move is. + + One such example of the techniques of optimal gameplay comes from Elwyn Berlekamp, who wrote \emph{The Dots-and-Boxes Game}. His paper~\cite{berlekamp2000} has the focal point of the game be at the point of a \emph{loony game}, or when any possible move opens up the possibility for the opponent to score multiple points in one turn by closing multiple boxes. This and the other papers on this subject assume the loony game state is one of the current player's turns. The player whose turn it is not is the player \emph{in control}. Berlekamp, in the paper, suggests that the optimal strategy for the player not in control is to keep the scores even between the players. That strategy involves an algorithm centered on opening the smallest area possible. The algorithms for both moves and their costs are then proved inductively. + + There are other approaches that are based off of Berlekamp's. One approach, by Kevin Buzzard and Michael Ciere~\cite{buzzard2014}, once again concerns the subject of loony games. This paper still assumes that one has knowledge of high-level Dots and Boxes gameplay but explains many of the terms frequently used. Once again, there is a player that is forced to make a move that will allow the opponent to score multiple boxes in one turn. The algorithm is a modified version of the one shown in~\cite{berlekamp2000}, changing the optimal move strategy slightly to prioritize one specific combination of a 3-chain over others. This paper also mentions opportunity costs of passing up on smaller chains to take advantage of larger ones if given the opportunity. The examples given range from games where there are only chains or only loops to games that are full of small chains and loops across the board. These simple examples usually have simple solutions, so the paper goes on and states theorems on different situations involving various different chains and loops and all of the strategies involved. The strategies all depend on values calculated off of the number of loops and chains on the board. These scores allow an optimal move to always be played even if the circumstances seem dire. + + One of the most recent game theory papers on Dots and Boxes is by Daniel Allcock. In his paper~\cite{allcock2019best}, he directly builds on~\cite{buzzard2014}, which in turn built off of~\cite{berlekamp2000}. It once again starts off with a loony game and builds off of the optimal moves. The optimal moves are once again based off of scores where the derivations are both in this paper and~\cite{buzzard2014}. The first expansion of the previous paper involves another base case involving the presence of different chains and loops. This base case gives another chance to the player not in control to possibly become the player in control if desired. Allcock also goes into more detail about when the player in control should retain control or relinquish it. This paper also mentions the usage of the \emph{hard-hearted handout}, a move involving the deliberate allowance of a player to close a small chain or loop while again forcing them to make another loony move. If the player in control follows the optimal strategy portrayed in this paper, and the opponent also plays optimally, the player in control will always win by a known margin~\cite{allcock2019best}. The rest of the paper is full of proofs for the theorems previously mentioned. These proofs build on each other and previous ones to definitively prove the opener strategy. One thing that Allcock does that the other papers do not involves the moves before the loony game happens. While short, it tells the player who expects to not be in control to capture as many early boxes possible to prevent the player from losing. + + \section{Approach} + The approach we took to solving this problem included first ensuring that the dots and boxes game was functioning correctly and the rules were followed as intended. Dots and Boxes can potentially have different sets of rules and it was seen while researching some existing code that these rules can have an effect on how algorithms are implemented. The rules that we defined were that the size of the grid can be variable, that each square in the grid is worth only one point, and that if a player scores a point then it is their turn again, with no limit on how many times a player can keep scoring and playing a new move. The fact that a player can go multiple times in a row is a major factor in both Monte Carlo and Minimax because it needs to factor in the possibility that it can either set up a run of scores for itself or its opponent. Each of the two algorithms have the ability to look ahead multiple moves and take into account that the next move could be their own again if they have just completed a box. + + For Minimax with Alpha Beta Pruning, the implementation involves all of the typical elements of a Minimax with Alpha Beta Pruning algorithm. When it is Minimax’s turn it retrieves the state of the game which is the most important object in all of the code. Along with this state comes helper functions that allow us to access all possible moves that can be played, and allow us to access a score given to the state for both players. The score looks at how many boxes are currently filled for each player and stores that in a dictionary keeping track for both player 1 and player 2’s score. Minimax creates a node which has the current state scored as well as a single integer score it creates by taking the difference of its opponent score and its own score. This is helpful to have one single integer compare nodes once the minimax creates a tree. The node also has a children variable which makes a dictionary correlating a tuple that gives the (row,column,orientation) coordinate of the move in question to another node that has the state as if the move correlated was played. This essentially is how the tree is created within Minimax. Minimax then continues to generate the tree and gives nodes down the tree scores that are stored within alpha or beta, if the scores are larger or smaller than the previous alpha or beta. Alpha and Beta are defined as -100 and 100 respectively because on the board sizes we are testing, scores of this value will never be reached. In our case Minimax has the ability to generate the entire game tree and find a move to play based on every possible scenario, but due to computing limitations we have added a variable that can restrict the depth Minimax searches too for moves. For all of our testing we kept the depth limited to three which essentially allows it to look three moves ahead in every possible scenario. After Minimax is done it returns the move that had the score that not only most benefited itself, but also inhibited its opponent. + + Monte Carlo Tree Search was implemented in a similar way as far as accessing the states and the helper functions within the states of the game, but MCTS had its own node class apart from Minimax. The main difference is that each node keeps track of its parent node along with the children. This was in order to easily back-propagate once ends of the trees were met. MCTS creates a root node which contains the state of the game once it is their turn. It then plays a multitude of random moves down different paths of the trees it looks for the end result of the game. Once it finds an ending of the game that is most beneficial to itself (the score differential is the highest) it back-propagates to find the move that led it down this path in the first place and returns that move. Monte Carlo search tree also has the ability to simulate every possible game state as well, but due to computing limitations and fairness to the Minimax MCTS was restricted via how many iterations do within the simulate function. This did not allow it to simulate all game states, but was still able to maintain the principles of what Monte Carlo Tree Search should do. Both Minimax and MCTS take approximately 3-5 seconds to complete their first move which showed us a pseudo fairness for when they would compete against each other. It is important to note that both of these two algorithms can be slightly modified to look further ahead into the game if the computing power was available, and would lead to better results against any opponent. + + Finally a random AI was generated as a sort of baseline opponent. This generated a list of every possible move given the game state and then picked a random one to play. In theory this AI should always be defeated, but obviously if it is simulated enough times it has potential to beat its opponent. The main goal of this project is to see if these algorithms can compete with the human brain and if so which algorithm is strongest to do so. Each algorithm played every other algorithm as well as a human. Another variable that goes along with every zero-sum game is who starts. For all the trials half of the games one player started and half of the games the other player started. It is unknown if starting is an advantage or not. We also did these simulations on different board sizes to see if the possibility of a tie affected how the AI’s reacted. For example, a 3\(\times\)3 board cannot end in a tie, but a 4\(\times\)4 board can. As far as the human trials go, all players were familiar with the rules whether from childhood or a brief explanation, but none were briefed on what algorithm they were playing or any ideal strategies of the game itself. The idea was to test the algorithms against a casual but knowledgeable player to see how the games would go. + + + \section{Data} + We conducted a number of experiments with different algorithms playing against each other. Monte Carlo Tree Search (MCTS), Minimax with alpha-beta pruning, and a random move algorithm were used and implemented using Python 3. Our program is capable of creating a board of any arbitrary \(m*n\) size. A 3\(\times\)3 board is fairly standard for a game of dots-and-boxes, but a 4\(\times\)4 provides interesting data as it is possible to result in a tie, whereas a 3\(\times\)3 game will always have a winner. For board sizes beyond a 4\(\times\)4 board, the search space dramatically increases, which causes the MCTS and A-B Search runtime to increase in turn. Therefore, we limited the scope of our experiments to 3\(\times\)3 and 4\(\times\)4 games. + + The agents were implemented by first creating a base agent class and allowing the other agents to inherit shared attributes, mainly involving communication between agents and the central server using WebSockets. WebSockets were used instead of HTTP due to its significantly reduced overhead. This reduced overhead allows the agents to communicate more efficiently, allowing the games to be faster. After implementing the WebSocket layer, the three agents were implemented according to their specifications. + + The random agent was the simplest one to implement, as the function for finding the next move is only 15 lines long. It is also by far the fastest, with games between random agents lasting a few seconds even on large grids. It simply works based off of all legal moves possible and makes a random one. This agent was primarily used for checking the implementation of both the Python backend and HTML/JavaScript frontend. It also served as a way to check the implementation of the other algorithms going against a known functioning one. + + The Monte Carlo Tree Search algorithm was the next one implemented. It inherits most of the non-move based aspects of the random agent. Our implementation gets the current state of the game, generates the game tree of all possible moves, and then traverses the tree several times to find the optimal solution. + + The last algorithm implemented was the Alpha-Beta (AB) search. Like Monte Carlo, it inherits many aspects from the random agent. It attempts to prune the amount of nodes in the minimax algorithm search tree, and proved to be the strongest algorithm for our dots and boxes game. + + In order for the agents to communicate as they played the game, a central web server was created in Python using various libraries such as logging, http.server, and socketserver. This server can be run locally on any of our machines for ease of use and so we could compare more iterations of the experiment between the three of us. An automated logger Python program was also developed so that games could be run in the background. The data collection process was further automated by writing a bash and Powershell script for UNIX-based and Windows-based operating systems. For the human trials, a basic JavaScript webpage was made allowing the player to play the game and see the AI play in real time. + + For the actual game matchups, we played combinations of all three agents, and ran trials of each agent versus a human player. For the human games we recruited four of our roommates to play 5 games against each algorithm, resulting in 20 trials per human iteration. + + + \clearpage + \begin{table}[htb] + \begin{center} + \caption{4\(\times\)4 Results} + \begin{tabular}{llccc} + \toprule + Player 1 & Player 2 & P1 Wins & P2 Wins & Ties \\ + \toprule + Random & Random & 102 & 86 & 12\\ + & MCTS & 0 & 50 & 0\\ + & AB & 0 & 50 & 0\\ + \midrule + MCTS & Random & 50 & 0 & 0\\ + & MCTS & 29 & 18 & 3\\ + & AB & 50 & 0 & 0 \\ + & Human & 14 & 4 & 2\\ + \midrule + AB & Random & 50 & 0 & 0\\ + & MCTS & 50 & 0 & 0\\ + & AB & 0 & 50 & 0\\ + & Human & 17 & 1 & 2\\ + \bottomrule + \end{tabular} + \end{center} + \end{table} + + \begin{table}[htb] + \begin{center} + \caption{3\(\times\)3 Results} + \begin{tabular}{llcc} + \toprule + Player 1 & Player 2 & P1 Wins & P2 Wins \\ + \toprule + Random & Random & 104 & 96\\ + & MCTS & 2 & 48\\ + & AB & 0 & 50\\ + \midrule + MCTS & Random & 49 & 1\\ + & MCTS & 23 & 27\\ + & AB & 0 & 50\\ + & Human & 15 & 5\\ + \midrule + AB & Random & 50 & 0\\ + & MCTS & 50 & 0\\ + & AB & 0 & 50\\ + & Human & 16 & 4\\ + \end{tabular} \\ + \end{center} + \end{table} + + \section{Analysis} + When analyzing the data collected during our experiment, the main things to look at are the validity of the implementation of our algorithms, and the strength of our implementations. These factors can all be identified by looking at the raw data of how each algorithm performs against its opponents. + + First we examine the validity of our two implementations: Minimax with Alpha-Beta Pruning (AB) and Monte Carlo Tree Search (MCTS). The main matchup that should be observed is each of the algorithms against the random picker. To try to ensure the randomness of the random algorithm, a random vs random trial was also run at a higher sample count to show that its moves are truly random, which is confirmed by an approximate 50/50 split over 200 trials. The higher sample count was chosen to confirm the randomness of both agents’ moves. The split is not exactly 50/50, but if the sample size were to increase even more the split would slowly become more evenly distributed. Now that the random search is validated, we can look at MCTS and AB against the random. MCTS won 49/50 as player 1 and 48/50 as player2 games against random on the 3\(\times\)3 board and won 50/50 games as both players on the 4\(\times\)4 board. This data shows that our implementation of MCTS almost always wins against the random agent with no logic. Looking at the three times it lost against random, it is most likely due to board size in correlation with randomness. When the board size is smaller, the random agent is more likely to pick a good move, and in these trials the random agent appears to have played a solid game. Once the board size increases, the luck goes away for the random agent and MCTS will have less of these losses to a random agent. This can validate our implementation for MCTS with an emphasis on the strength with larger board sizes. Next, looking at AB vs random, AB won all 200 games it played against the random agent across both board sizes. This strongly validated Minimax with Alpha Beta Pruning and also began the comparison between the two algorithms because AB did not have the same issues that MCTS had in the 3\(\times\)3 board size. + + To look at the strength of the algorithms, the matches that need to be looked at are each respective algorithm against the other as well as against the human player. Before these trials were run we played the algorithms against an identical agent that used the same algorithm as itself to see if either algorithm had a player 1 or player 2 bias. It can be seen that the MCTS does not really have a player 1 or player 2 bias because the games were just about equivalent for wins on both boards. For AB vs AB it can be seen that AB player 2 won every time for the matchup against the two. This makes sense because the early game is fairly arbitrary and AB never picks randomly like MCTS in the early game, the first move AB player plays is the only element of randomness that AB has. AB as player 2 never picks randomly. Because of this bias we let MCTS and AB go first during the human trials to try to get the fairest of data possible without requiring our human samples to give away too much of their time. + + First looking at the AB vs MCTS matchup for all combinations of p1 and p2, as well as board size we ran into a surprising result. AB won all of the matchups against MCTS.\@ Next we look at the AB vs human trials, AB won 16/20 on 3\(\times\)3 and won 17/20 with 2 ties on the 4\(\times\)4 board. Finally looking at MCTS vs human trials, MCTS won 15/20 games on the 3\(\times\)3 and won 14/20 with 2 ties on the 4\(\times\)4 board. + + This data was surprising because we had thought it would be fairly evenly distributed when looking at AB vs MCTS, but when looking further at the computing limitations and how those affected each algorithm it made sense on why the MCTS struggled so much against another algorithm. We designed MCTS to have the ability to fully simulate all possible move sets and endings and then play the move that had the best score based on the evaluation of the board. Since a game of dots and boxes has a fairly complex tree even while playing on a relatively small board (for example a 3\(\times\)3 board initially has 24 possible moves), MCTS would take a painfully long time to play each move. To accommodate this, we put a time limit on it so it would only be able to simulate a few branches of the tree and pick the best possible out of those. This accommodation clearly made it work against the random bot as well as be effective against the human opponent, but when it came to playing AB stood no chance. If the experiment were to be expanded MCTS would give better results with an adjustment of the time limit, but AB can also be more powerful by allowing it to further dive along the plies of the tree. For the scope of this project, it is fair to say that our AB implementation is the stronger algorithm and if an algorithm had to be picked to be expanded upon to play against a highly knowledgeable human player, AB would be our group's choice. + + \section{Conclusions} + A major way to better our experiment would be to rewrite Monte Carlo Search to improve it. As of now, AB Search often beats MCTS which has proven to be weaker in other research. To improve it, we could start by increasing the search time by orders of magnitude, and more searches can be done by making the process threaded. Giving Monte Carlo more time will allow it to run through more possible outcomes, allowing more optimal play. The downside is that it will take significantly longer to run every turn. Finding the most effective compromise between speed and quality of play would allow the game to be both challenging and not have the prospective player wait for the AI to take its turn. + + Another possible method of testing is adding larger board sizes. Larger boards will take significantly more time to run due to the increased number of simulations needed for all non-random algorithms as well as having more moves needed for a complete game. Larger games also allow larger chains to form, allowing more optimal players to win by even larger margins than before. + + Alpha-Beta can also be improved. Our implementation only predicts three moves in advance for the sake of runtime, but increasing this will allow it to play more optimally at the cost of drastically reduced efficiency. AB Search is also the algorithm that needs to be optimized more. An easy way of doing this is to make it threaded allowing multiple simulations to be done at the same time but this comes at the cost of complexity, centered around inter-process communication. + + Adding more search algorithms would also be another thing to do if given additional time and resources. One such path would be integrating a neural network. This would ease the problem of single-threaded performance in a multi-thread-dominated computing environment. Utilizing all 8+ cores of high end desktop CPUs would allow the maximum efficiency while also maximizing the performance of the AI. + + While our Monte Carlo Tree Search could be made stronger, we are able to conclude from our data that our Alpha-Beta Search implementation is remarkably strong by comparison. Furthermore, both MCTS and AB Search consistently beat human players, which fulfills our original goal. Whether these results can be extrapolated to larger boards remains to be seen, but in the case of 3\(\times\)3 and 4\(\times\)4 boards, AB Search is better than MCTS, which is better than the random agent and human players. + + \bibliography{4511proj} + \bibliographystyle{unsrt} + +\end{document} \ No newline at end of file -- cgit v1.2.3