From a4d1157a33c4da4064da7acb00a7c325ccc79b3c Mon Sep 17 00:00:00 2001 From: RossTheRoss Date: Sun, 11 Apr 2021 11:57:06 -0500 Subject: oops --- csci4511w/writing4.bib | 30 ++++++++++++++++-------------- csci4511w/writing4.tex | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/csci4511w/writing4.bib b/csci4511w/writing4.bib index 095e76c..974b1be 100644 --- a/csci4511w/writing4.bib +++ b/csci4511w/writing4.bib @@ -29,17 +29,7 @@ VOLUME = {}, YEAR = {2015} } -@inproceedings{CCDC2019, - AUTHOR = {Y. {Zhang} and S. {Li} and X. {Xiong}}, - BOOKTITLE = {2019 Chinese Control And Decision Conference (CCDC)}, - DOI = {10.1109/CCDC.2019.8833043}, - NUMBER = {}, - PAGES = {6319-6322}, - TITLE = {A Study on the Game System of Dots and Boxes Based on Reinforcement Learning}, - VOLUME = {}, - YEAR = {2019} -} -@article{Cornell2020, +@article{Cornell1998, author = {Lex Weaver and Terry Bossomaier}, title = {Evolution of Neural Networks to Play the Game of Dots-and-Boxes}, @@ -61,7 +51,6 @@ year={2012}, month={July} } - @inproceedings{berlekamp2000, AUTHOR = {Berlekamp, Elwyn and Scott, Katherine}, BOOKTITLE = {More Games of No Chance, Proc. MSRI Workshop on Combinatorial Games}, @@ -69,7 +58,6 @@ TITLE = {Forcing your opponent to stay in control of a loony dots-and-boxes endgame}, YEAR = {2000} } - @misc{buzzard2014, ARCHIVEPREFIX = {arXiv}, AUTHOR = {Kevin Buzzard and Michael Ciere}, @@ -77,4 +65,18 @@ PRIMARYCLASS = {math.CO}, TITLE = {Playing simple loony dots and boxes endgames optimally}, YEAR = {2014} -} \ No newline at end of file +} +@misc{allcock2019best, + ARCHIVEPREFIX = {arXiv}, + AUTHOR = {Daniel Allcock}, + EPRINT = {1811.10747}, + PRIMARYCLASS = {math.CO}, + TITLE = {Best play in Dots and Boxes endgames}, + YEAR = {2019} +} + @misc{wlson_2002, + AUTHOR = {Wlson, David}, + JOURNAL = {Dots-and-Boxes Analysis Index}, + URL = {https://wilson.engr.wisc.edu/boxes/}, + YEAR = {2002} +} \ No newline at end of file diff --git a/csci4511w/writing4.tex b/csci4511w/writing4.tex index 2e559ef..19031c6 100644 --- a/csci4511w/writing4.tex +++ b/csci4511w/writing4.tex @@ -1,11 +1,43 @@ -\documentclass{article} -\usepackage{natbib} +\documentclass[12pt]{article} +\usepackage{fullpage}\usepackage{indentfirst} \title{Writing 4} \author{Jack Vehmeier, Andrea Smith, Matt Strapp} -\date{2021--03--30} +\date{2021/03/30} \begin{document} \maketitle - \bibliographystyle{unsrt} + \section*{Search Algorithms (Jack)} + Implementing AI into a non-trivial game such as dots and boxes requires knowledge on how AI can be implemented into a variety of different games. Once we can see how algorithms are adjusted to efficiently play different types of games we can accurately and efficiently create an algorithm to always win dots and boxes. + + A general algorithm called AlphaZero creates a general game playing agent which can use probability distribution along with Monte Carlo Tree search simulations to play a variety of games at a high level.~\cite{GeneralGame} In Goldwaser and Thielscher's paper they discuss how Monte Carlo Search can have memory problems if games are large enough. If we can implement a way to vary the board size, memory will become an issue if the board becomes large. A way to fix a memory issue is to have an implementation of a copy of the tree that only tracks the state changes of the game in question. This gives an extra \(O(logn)\) memory per state change by having access to the copy of the tree.~\cite{GeneralGame} Looking at fixes to memory issues using simple methods will be important to represent in our algorithms for creating the unbeatable AI for dots and boxes. Monte Carlo Tree search is able to be applied to a variety of games as seen before in AlphaZero. Before implementing MCTS it is important to understand its strengths and drawbacks and identify if it can be applied to our game at all. MCTS is best when it is playing games that have a finite length and when players make a single move per turn. It uses the steps of Selection of Start node, Expansion upon node, Simulation of outcome due to further expansions, and finally Back-tracking to navigate a game tree in order to give the AI the best possible path to win.~\cite{MCTS} + + While it is stated that MCTS can be effectively implemented for classic board games in the Universiteit Maastricht's paper, I see a possible issue in the fact that if a box is completed in dots and boxes the player that completed the box goes again. Monte Carlo search should be able to be modified to accommodate for this, but another algorithm such as Minimax with AlphaBeta pruning should be explored as well to see if dots and boxes can be implemented more effectively. Minimax is an algorithm which uses two agents (Min and Max) to navigate a tree and evaluate current states of the game in order to find an optimal move.~\cite{Minimax} Dalian Polytechnic University discusses that Minimax is perfect for a game like connect 4 due to the fact that some nodes inherently have no impact on the final outcome of the game due to the nature of the game itself. These nodes can be pruned using AlphaBeta. + + \section*{Already Existing Methods (Andrea)} + + A question that arose during brainstorming is how AI would pick which lines to fill in the early stage of dots and boxes because it seems like there is no move that is better than the other. It is shown that a move like this may seem random, but if a heuristic is created in a way it can simulate which lines are filled in to win a majority of games and prioritize these lines to be filled in during the early stages of the dots and boxes game.~\cite{Minimax} + + In existing scholarly research, it appears most computer scientists approach for Dots and Boxes in two ways: creating an agent to solve a pre-existing problem (i.e.\ predicting the outcome of a partially filled out board) or creating an agent that evaluates moves at each state of the game. Because our project is focused on agents of the latter type, we will discuss agents that make move recommendations at each stage. + + There already exists a number of strong Dots and Boxes agents. One such agent was developed by David Wilson in 2010~\cite{Barker_Korf_2012}.The solver, for each stage, starts at the end of the game with all lines filled in. It then proceeds to the next stage with one less line filled and computes a score for the possible move. Moves are given a positive score if one or two boxes were formed, and a negative score if no boxes are formed. It then moves on to the stage with two less lines filled in, repeating until it reaches the original position. Once it returns to the original position of that stage, the solver has computed all possible moves and keeps the best score. Wilson has used this program to solve previously unsolved problems from the well-known \emph{The Dots-and-Boxes Game} by Elwyn Berlekamp. This solution generates \(2^p\) states per stage, and while that is quite fast, it is not nearly as fast as the \(p!\) that would be generated by a more naive solution such as depth-first search. The greatest pitfall of Wilson’s solver is its memory and time requirement, especially for non-square boards. For example, a 4$\times$5 board problem would need 8 terabytes of disk space, 9 gigabytes of RAM, and would need to run for 130 days before being solved~\cite{wlson_2002}. + + Therefore, while Wilson’s solver is an excellent implementation to solve difficult pre-existing problems, there exist faster and less memory-intensive Dots and Boxes agents. In fact, the agent created by Joseph Barker and Richard Korf in 2012 was efficient enough to solve the 4$\times$5 board problem~\cite{Barker_Korf_2012}. The agent uses Alpha-Beta search as its main solving method, and takes advantage of several other strategies to minimize unnecessary repetitions and amount of nodes explored to find an optimal solution. The first of these strategies is for the solver to consider chaining-- where several boxes are connected into a ``tube'' with one or both ends open. It uses a preprocessing step when it encounters a chain to capture all provably optimal boxes. If the resulting next move is a hard-hearted handout (that is, a set of two boxes capturable by a single line), it will either capture the handout and be forced to make the next move or leave it for the opponent to capture. In doing this, the solver essentially only considers one state but with a different player, eliminating the need to produce another state in the problem space. The agent also employs a transposition table that stores explored states in a table with its associated minimax value. If a repeated state is encountered, its value is retrieved from memory instead of re-determining the minimax value. This further optimizes the solution by cutting down time spent processing duplicate states. The agent also considers that certain moves that could be made on symmetrical board spaces result in the same minimax values, and therefore can choose a move without considering both spaces. Lastly, the agent is given a move-ordering heuristic as it uses Alpha-Beta search to traverse children of a node in order to make better moves earlier on to narrow the endgame search bounds. By using these strategies, the agent solved the previously unsolved 4$\times$5 board in 10 days. It determined that, given optimal play on both sides, the 4$\times$5 game will always result in a tie. + + While literature concerning Dots and Boxes is relatively scarce compared to other computational science studies, some research has been done on improving existing techniques for solving the game. Neural networking, while slightly outside of the scope of our project, is an important furtherment of game playing research. One preliminary attempt at applying neural networks to Dots and Boxes was an agent created in 1998 by Lex Weaver and Terry Bossomaier. They assert that some game-playing agents, despite being grandmaster rank, sometimes mishandle problems that are easily solved by human novices~\cite{Cornell1998}. These issues could be solved by giving the agent feedback of which games it wins and loses. Therefore, they used a genetic algorithm to weight their network and pitted it against heuristic-using players as a benchmark. While this network successfully improved while playing against lower level agents, it struggled to learn strategies beyond randomly creating boxes. In order to advance to higher level strategies, such as avoiding filling out the third side of boxes (which gives the opponent a free box), the program’s architecture required a more sparse neural network. + + More recent research has been more successful, such as a Monte-Carlo Tree Search (MCTS) based agent created in 2015 called QDab, QDab improves upon MCTS by using a neural network to select a move instead of a random move. The neural network predicts the winning probability and each stage, and its training data was randomly generated board states with minimax search performed on it to find the resulting score if both opponents play optimally. These random boards were generated to be mid-game, as early game winning probability for Dots and Boxes is extremely complex to compute. The agent also uses other strategies that have not necessarily to be proven to be correct, such as stopping the simulation at a board state whose winning strategy can easily be found with minimax search, adding an auxiliary greedy policy to reduce the number of branches, optimizing the exploration order for MCTS, pruning nodes, and parallelization for multiple processing. This neural network using a version of MCTS proved fairly successful, winning nearly 100\% of its matches against a normal MCTS implementation when given a set ``thinking time'' of 20 seconds. This winning percentage fluctuates if QDab goes second, but can be counterbalanced with more thinking time. + + \section*{The Theory Behind Dots and Boxes (Matt)} + + One of the most important parts of creating an AI to play a game is for the AI to know the rules of the game and the optimal moves. There exist papers from the field of game theory that solve these problems. These papers usually involve games that are already partially completed, where the question becomes what the ideal next move is. + + One such example of the techniques of optimal gameplay comes from Elwyn Berlekamp, who wrote \emph{The Dots-and-Boxes Game}. His paper~\cite{berlekamp2000} has the focal point of the game be at the point of a \emph{loony game}, or when any possible move opens up the possibility for the opponent to score multiple points in one turn by closing multiple boxes. This and the other papers on this subject assume the loony game state is one of the current player's turns. The player whose turn it is not is the player \emph{in control}. Berlekamp, in the paper, suggests that the optimal strategy for the player not in control is to keep the scores even between the players. That strategy involves an algorithm centered on opening the smallest area possible. The algorithms for both moves and their costs are then proved inductively. + + There are other approaches that are based off of Berlekamp's. One approach, by Kevin Buzzard and Michael Ciere~\cite{buzzard2014}, once again concerns the subject of loony games. This paper still assumes that one has knowledge of high-level Dots and Boxes gameplay but explains many of the terms frequently used. Once again, there is a player that is forced to make a move that will allow the opponent to score multiple boxes in one turn. The algorithm is a modified version of the one shown in~\cite{berlekamp2000}, changing the optimal move strategy slightly to prioritize one specific combination of a 3-chain over others. This paper also mentions opportunity costs of passing up on smaller chains to take advantage of larger ones if given the opportunity. The examples given range from games where there are only chains or only loops to games that are full of small chains and loops across the board. These simple examples usually have simple solutions, so the paper goes on and states theorems on different situations involving various different chains and loops and all of the strategies involved. The strategies all depend on values calculated off of the number of loops and chains on the board. These scores allow an optimal move to always be played even if the circumstances seem dire. + + One of the most recent game theory papers on Dots and Boxes is by Daniel Allcock. In his paper~\cite{allcock2019best}, he directly builds on~\cite{buzzard2014}, which in turn built off of~\cite{berlekamp2000}. It once again starts off with a loony game and builds off of the optimal moves. The optimal moves are once again based off of scores where the derivations are both in this paper and~\cite{buzzard2014}. The first expansion of the previous paper involves another base case involving the presence of different chains and loops. This base case gives another chance to the player not in control to possibly become the player in control if desired. Allcock also goes into more detail about when the player in control should retain control or relinquish it. This paper also mentions the usage of the \emph{hard-hearted handout}, a move involving the deliberate allowance of a player to close a small chain or loop while again forcing them to make another loony move. If the player in control follows the optimal strategy portrayed in this paper, and the opponent also plays optimally, the player in control will always win by a known margin~\cite{allcock2019best}. The rest of the paper is full of proofs for the theorems previously mentioned. These proofs build on each other and previous ones to definitively prove the opener strategy. One thing that Allcock does that the other papers do not involves the moves before the loony game happens. While short, it tells the player who expects to not be in control to capture as many early boxes possible to prevent the player from losing. + \bibliography{writing4} + \bibliographystyle{unsrt} + \end{document} \ No newline at end of file -- cgit v1.2.3