From 1337de59b712c30c447b2b45de9ec54ecd6d8ea4 Mon Sep 17 00:00:00 2001 From: Matt Strapp Date: Fri, 18 Feb 2022 12:41:51 -0600 Subject: Hopefully get the system able to be packaged Signed-off-by: Matt Strapp --- System/Pendulum/__init__.py | 3 + System/Pendulum/encoder.py | 77 ++++++ System/Pendulum/errorTestScript.py | 42 ++++ System/Pendulum/homework8.ipynb | 350 +++++++++++++++++++++++++++ System/Pendulum/initialize_system.py | 18 ++ System/Pendulum/motor.py | 76 ++++++ System/Pendulum/results.csv | 252 ++++++++++++++++++++ System/Pendulum/swingUp.py | 207 ++++++++++++++++ System/Pendulum/system.py | 373 +++++++++++++++++++++++++++++ System/Pendulum/system_swingup_test.py | 293 +++++++++++++++++++++++ System/Pendulum/system_swingup_test_2.py | 395 +++++++++++++++++++++++++++++++ 11 files changed, 2086 insertions(+) create mode 100644 System/Pendulum/__init__.py create mode 100644 System/Pendulum/encoder.py create mode 100644 System/Pendulum/errorTestScript.py create mode 100644 System/Pendulum/homework8.ipynb create mode 100644 System/Pendulum/initialize_system.py create mode 100644 System/Pendulum/motor.py create mode 100644 System/Pendulum/results.csv create mode 100644 System/Pendulum/swingUp.py create mode 100644 System/Pendulum/system.py create mode 100644 System/Pendulum/system_swingup_test.py create mode 100644 System/Pendulum/system_swingup_test_2.py (limited to 'System/Pendulum') diff --git a/System/Pendulum/__init__.py b/System/Pendulum/__init__.py new file mode 100644 index 0000000..510ffea --- /dev/null +++ b/System/Pendulum/__init__.py @@ -0,0 +1,3 @@ +from .motor import Motor +from system import System +from encoder import Encoder \ No newline at end of file diff --git a/System/Pendulum/encoder.py b/System/Pendulum/encoder.py new file mode 100644 index 0000000..ece7fd1 --- /dev/null +++ b/System/Pendulum/encoder.py @@ -0,0 +1,77 @@ +# Import required modules +import RPi.GPIO as GPIO +import time +import math + +# Constants: parameters that the caller cannot modify +# Delay: Minimum delay necessary after pull pin low to read input +delay = 0.0000005 + +# Encoder Class +# This controls the motor at the given IO +class Encoder: + def __init__(self, clk_pin, cs_pin, data_pin): + # Set the board IO (just in case it hasn't been done yet) + GPIO.setmode(GPIO.BCM) + # Setup class varaiable + self.offset = 0 + self.clk_pin = clk_pin + self.cs_pin = cs_pin + self.data_pin = data_pin + # Setup the IO + try: + GPIO.setup(self.clk_pin, GPIO.OUT) + GPIO.setup(self.cs_pin, GPIO.OUT) + GPIO.setup(self.data_pin, GPIO.IN) + # Setup the CS and CLK to be high + GPIO.output(PIN_CLK, 1) + GPIO.output(PIN_CS, 1) + except: + # If this fails, it's likely because the IO has already been configured. The encoders share some pins. Ignore the failure + time.sleep(0.01) # Used just to have something in the exception catch + #print("ERROR. Unable to setup the configuration required") + # Wait some time to before reading + time.sleep(0.5) + def set_zero(self, offset = 0): + # Take current position as zero + pos = self.read_position('Raw') + self.offset = (self.read_position('Raw') + self.offset + offset) % 1024 + def clockup(self): + GPIO.output(self.clk_pin, 1) + def clockdown(self): + GPIO.output(self.clk_pin, 0) + def read_position(self, format): + # Most of this is based of timing diagram of encoder + # Pull CS low to start reading + GPIO.output(self.cs_pin, 0) + # Delay necessary before reading is ready + time.sleep(delay*2) + data = 0 + # Clockdown necessary before reading + self.clockdown() + # Go through 10 bits needed to read + for i in range(0, 10): + # Clock up to start reading one bit + self.clockup() + # Shift data left and insert input + data <<= 1 + data |= GPIO.input(self.data_pin) + # Clock down after finish reading + self.clockdown() + # Pull CS high after finish reading + GPIO.output(self.cs_pin, 1) + # Format with offset, Max is 1024 + data = (data - self.offset) % 1024 + # Data is linearly mapped + if format=="Raw": + return data + elif format == "Degrees": + degrees = (data/1024.0) * 360.0 + return degrees + elif format == "Radians": + radians = (data/1024.0) * (2.0*math.pi) + return radians + else: + print("ERROR. Invalid format (Raw, Degrees, Radians)") + return None + diff --git a/System/Pendulum/errorTestScript.py b/System/Pendulum/errorTestScript.py new file mode 100644 index 0000000..cb029d5 --- /dev/null +++ b/System/Pendulum/errorTestScript.py @@ -0,0 +1,42 @@ + +import sys +sys.path.insert(0, '/home/pi/pendulum/System') +from System.system import System +import time +from sys import exit +#import pandas + + +sys = System(angular_units = 'Radians') + +for x in range(0,10: + angle, linear = sys.measure() + print("Angle: " + str(angle) + ", Linear: " + str(linear)) + sys.add_results(linear, angle, angle) + time.sleep(0.2) + + +exit() + + + + + + + + +#class test(): +# def __init__(self, x, theta): +# self.x = 0 +# self.theta = 0 +# +# def getINFO(self, theta): +# #theta, x = self.System.measure() +# theta, x = self.sys.measure() +# +#while(1): +# test.getINFO() +# print("t") +# #print("theta",test.theta) +# #print("x",test.x) +# \ No newline at end of file diff --git a/System/Pendulum/homework8.ipynb b/System/Pendulum/homework8.ipynb new file mode 100644 index 0000000..a9c28c9 --- /dev/null +++ b/System/Pendulum/homework8.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import swingUp\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import torch as pt\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The Swing-Up Problem\n", + "\n", + "In this homework we will study a classic problem of swinging up a pendulum and keeping it balanced. In this problem, forces in the range $[-1,1]$ are applied to a cart with a pendulum on it. The episode ends if one of the boundaries is reached or the pendulum swings too fast. Otherwise, it just keeps going. You recieve a reward of 1 any time the pendulum is above horizontal. You recieve a negative cost if the episode ends becauseof hitting the boundary or swinging too fast.\n", + "\n", + "The code below demonstrates a basic SARSA method with a neural network for approximating the $Q$-function. This system has continuous observation and action spaces. However, for simplicity and applicability of the basic SARSA method, we are only using two actions." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class nnQ(pt.nn.Module):\n", + " \"\"\"\n", + " Here is a basic neural network with for representing a policy \n", + " \"\"\"\n", + " \n", + " def __init__(self,stateDim,numActions,numHiddenUnits,numLayers):\n", + " super().__init__()\n", + " \n", + " InputLayer = [pt.nn.Linear(stateDim+numActions,numHiddenUnits),\n", + " pt.nn.ReLU()]\n", + " \n", + " HiddenLayers = []\n", + " for _ in range(numLayers-1):\n", + " HiddenLayers.append(pt.nn.Linear(numHiddenUnits,numHiddenUnits))\n", + " HiddenLayers.append(pt.nn.ReLU())\n", + " \n", + " \n", + " OutputLayer = [pt.nn.Linear(numHiddenUnits,1)]\n", + " \n", + " AllLayers = InputLayer + HiddenLayers + OutputLayer\n", + " self.net = pt.nn.Sequential(*AllLayers)\n", + " \n", + " self.numActions = numActions\n", + " \n", + " def forward(self,x,a):\n", + " x = pt.tensor(x,dtype=pt.float32)\n", + "\n", + " b = pt.nn.functional.one_hot(pt.tensor(a),self.numActions)\n", + " \n", + " c = b.float().detach()\n", + " y = pt.cat([x,c])\n", + " \n", + " return self.net(y)\n", + " \n", + " \n", + "class sarsaAgent:\n", + " def __init__(self,stateDim,numActions,numHiddenUnits,numLayers,\n", + " epsilon=.1,gamma=.9,alpha=.1):\n", + " self.Q = nnQ(stateDim,numActions,numHiddenUnits,numLayers)\n", + " self.gamma = gamma\n", + " self.epsilon = epsilon\n", + " self.alpha = alpha\n", + " self.numActions = numActions\n", + " self.s_last = None\n", + " def action(self,x):\n", + " # This is an epsilon greedy selection\n", + " if rnd.rand() < self.epsilon:\n", + " a = rnd.randint(numActions)\n", + " else:\n", + " qBest = -np.inf\n", + " for aTest in range(self.numActions):\n", + " qTest = self.Q(x,aTest).detach().numpy()[0]\n", + " if qTest > qBest:\n", + " qBest = qTest\n", + " a = aTest\n", + " return a\n", + " \n", + " def update(self,s,a,r,s_next,done):\n", + " \n", + " # Compute the TD error, if there is enough data\n", + " update = True\n", + " if done:\n", + " Q_cur = self.Q(s,a).detach().numpy()[0]\n", + " delta = r - Q_cur\n", + " self.s_last = None\n", + " Q_diff = self.Q(s,a)\n", + " elif self.s_last is not None:\n", + " Q_next = self.Q(s,a).detach().numpy()[0]\n", + " Q_cur = self.Q(self.s_last,self.a_last).detach().numpy()[0]\n", + " delta = self.r_last + self.gamma * Q_next - Q_cur\n", + " Q_diff = self.Q(self.s_last,self.a_last)\n", + " else:\n", + " update = False\n", + " \n", + " # Update the parameter via the semi-gradient method\n", + " if update:\n", + " self.Q.zero_grad()\n", + " Q_diff.backward()\n", + " for p in self.Q.parameters():\n", + " p.data.add_(self.alpha*delta,p.grad.data)\n", + " \n", + " \n", + " \n", + " if not done:\n", + " self.s_last = np.copy(s)\n", + " self.a_last = np.copy(a)\n", + " self.r_last = np.copy(r)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The Simulation\n", + "\n", + "Here is the simulation. It takes about an hour to run this (at least on my computer). I suggest you make yourself a very complex, luxurious sandwich while you wait. Alternatively, just decrease `maxSteps` to a more civilised value.\n", + "\n", + "It doesn't learn the task perfectly, but it will do pretty well for stretches. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Episode: 1 Total Steps: 705 , Ave. Reward: -0.7323210705117651 , Episode Length: 705 Max Up-Time: 54\n", + "Episode: 2 Total Steps: 1087 , Ave. Reward: -1.386005534482438 , Episode Length: 382 Max Up-Time: 38\n", + "Episode: 3 Total Steps: 1563 , Ave. Reward: -0.757185810040358 , Episode Length: 476 Max Up-Time: 52\n", + "Episode: 4 Total Steps: 1871 , Ave. Reward: -2.3208087438993275 , Episode Length: 308 Max Up-Time: 86\n", + "Episode: 5 Total Steps: 2193 , Ave. Reward: -2.1598192165326444 , Episode Length: 322 Max Up-Time: 33\n", + "Episode: 6 Total Steps: 2855 , Ave. Reward: -0.7392412280730795 , Episode Length: 662 Max Up-Time: 45\n", + "Episode: 7 Total Steps: 3307 , Ave. Reward: -1.3685011673110579 , Episode Length: 452 Max Up-Time: 60\n", + "Episode: 8 Total Steps: 3893 , Ave. Reward: -0.745503768027489 , Episode Length: 586 Max Up-Time: 27\n", + "Episode: 9 Total Steps: 4243 , Ave. Reward: -1.8577023916291222 , Episode Length: 350 Max Up-Time: 44\n", + "Episode: 10 Total Steps: 4510 , Ave. Reward: -0.7272320071143903 , Episode Length: 267 Max Up-Time: 29\n", + "Episode: 11 Total Steps: 5193 , Ave. Reward: -0.36655536116630455 , Episode Length: 683 Max Up-Time: 39\n", + "Episode: 12 Total Steps: 6005 , Ave. Reward: -0.2880664852587831 , Episode Length: 812 Max Up-Time: 57\n", + "Episode: 13 Total Steps: 6476 , Ave. Reward: -1.9700554803843828 , Episode Length: 471 Max Up-Time: 40\n", + "Episode: 14 Total Steps: 6654 , Ave. Reward: -3.5124131530851304 , Episode Length: 178 Max Up-Time: 33\n", + "Episode: 15 Total Steps: 7120 , Ave. Reward: -1.0719102072420077 , Episode Length: 466 Max Up-Time: 30\n", + "Episode: 16 Total Steps: 7385 , Ave. Reward: -1.2138522089615815 , Episode Length: 265 Max Up-Time: 30\n", + "Episode: 17 Total Steps: 7890 , Ave. Reward: -0.9911817636578898 , Episode Length: 505 Max Up-Time: 34\n", + "Episode: 18 Total Steps: 8348 , Ave. Reward: -0.24673268477366056 , Episode Length: 458 Max Up-Time: 24\n", + "Episode: 19 Total Steps: 8885 , Ave. Reward: -0.19901803972570822 , Episode Length: 537 Max Up-Time: 34\n", + "Episode: 20 Total Steps: 9257 , Ave. Reward: -1.0397485980722023 , Episode Length: 372 Max Up-Time: 47\n", + "Episode: 21 Total Steps: 9593 , Ave. Reward: -1.9637917520715307 , Episode Length: 336 Max Up-Time: 62\n", + "Episode: 22 Total Steps: 10146 , Ave. Reward: -1.9249864843211308 , Episode Length: 553 Max Up-Time: 37\n", + "Episode: 23 Total Steps: 10413 , Ave. Reward: -2.4530561661945107 , Episode Length: 267 Max Up-Time: 45\n", + "Episode: 24 Total Steps: 11569 , Ave. Reward: -0.6792904495051211 , Episode Length: 1156 Max Up-Time: 78\n", + "Episode: 25 Total Steps: 11982 , Ave. Reward: -0.8938237978597035 , Episode Length: 413 Max Up-Time: 39\n", + "Episode: 26 Total Steps: 12243 , Ave. Reward: -1.2015515630784517 , Episode Length: 261 Max Up-Time: 20\n", + "Episode: 27 Total Steps: 12419 , Ave. Reward: -2.887021412838564 , Episode Length: 176 Max Up-Time: 34\n", + "Episode: 28 Total Steps: 13159 , Ave. Reward: -0.6110313146967027 , Episode Length: 740 Max Up-Time: 37\n", + "Episode: 29 Total Steps: 13589 , Ave. Reward: -1.928537880786853 , Episode Length: 430 Max Up-Time: 75\n", + "Episode: 30 Total Steps: 14149 , Ave. Reward: -2.057777774292444 , Episode Length: 560 Max Up-Time: 33\n", + "Episode: 31 Total Steps: 14662 , Ave. Reward: -1.1384388629314761 , Episode Length: 513 Max Up-Time: 75\n", + "Episode: 32 Total Steps: 15058 , Ave. Reward: -0.7835031778339514 , Episode Length: 396 Max Up-Time: 31\n", + "Episode: 33 Total Steps: 15649 , Ave. Reward: -0.8815494773451172 , Episode Length: 591 Max Up-Time: 57\n", + "Episode: 34 Total Steps: 16393 , Ave. Reward: -1.2573853074557841 , Episode Length: 744 Max Up-Time: 44\n", + "Episode: 35 Total Steps: 16920 , Ave. Reward: -1.7520053641436282 , Episode Length: 527 Max Up-Time: 60\n", + "Episode: 36 Total Steps: 17557 , Ave. Reward: -1.3575707923649754 , Episode Length: 637 Max Up-Time: 29\n", + "Episode: 37 Total Steps: 18229 , Ave. Reward: -1.1195498770809882 , Episode Length: 672 Max Up-Time: 53\n", + "Episode: 38 Total Steps: 18618 , Ave. Reward: -3.222163638956208 , Episode Length: 389 Max Up-Time: 35\n", + "Episode: 39 Total Steps: 19276 , Ave. Reward: -0.5477558848181898 , Episode Length: 658 Max Up-Time: 65\n", + "Episode: 40 Total Steps: 19804 , Ave. Reward: -0.6584685777265661 , Episode Length: 528 Max Up-Time: 59\n", + "Episode: 41 Total Steps: 20571 , Ave. Reward: -0.4637257282420204 , Episode Length: 767 Max Up-Time: 66\n", + "Episode: 42 Total Steps: 21002 , Ave. Reward: -1.0716300038503115 , Episode Length: 431 Max Up-Time: 34\n", + "Episode: 43 Total Steps: 21892 , Ave. Reward: -0.5765091241222032 , Episode Length: 890 Max Up-Time: 43\n", + "Episode: 44 Total Steps: 22898 , Ave. Reward: -0.014668360500057598 , Episode Length: 1006 Max Up-Time: 82\n", + "Episode: 45 Total Steps: 23543 , Ave. Reward: -0.9241536579523029 , Episode Length: 645 Max Up-Time: 83\n", + "Episode: 46 Total Steps: 23871 , Ave. Reward: -1.0800045254850208 , Episode Length: 328 Max Up-Time: 54\n", + "Episode: 47 Total Steps: 24746 , Ave. Reward: -0.4107819625154534 , Episode Length: 875 Max Up-Time: 55\n", + "Episode: 48 Total Steps: 25088 , Ave. Reward: -1.2879242748541877 , Episode Length: 342 Max Up-Time: 33\n", + "Episode: 49 Total Steps: 26200 , Ave. Reward: -0.26045605232327973 , Episode Length: 1112 Max Up-Time: 41\n", + "Episode: 50 Total Steps: 26444 , Ave. Reward: -1.919118523045648 , Episode Length: 244 Max Up-Time: 39\n", + "Episode: 51 Total Steps: 27253 , Ave. Reward: -1.4967671580395374 , Episode Length: 809 Max Up-Time: 46\n", + "Episode: 52 Total Steps: 27729 , Ave. Reward: -2.8176206551741148 , Episode Length: 476 Max Up-Time: 79\n", + "Episode: 53 Total Steps: 28229 , Ave. Reward: -0.6498666207781548 , Episode Length: 500 Max Up-Time: 41\n", + "Episode: 54 Total Steps: 28535 , Ave. Reward: -1.442685133245505 , Episode Length: 306 Max Up-Time: 50\n", + "Episode: 55 Total Steps: 29554 , Ave. Reward: -0.7346716942732042 , Episode Length: 1019 Max Up-Time: 91\n", + "Episode: 56 Total Steps: 30478 , Ave. Reward: -0.2849088403628985 , Episode Length: 924 Max Up-Time: 74\n", + "Episode: 57 Total Steps: 31018 , Ave. Reward: -0.9564715045670377 , Episode Length: 540 Max Up-Time: 37\n", + "Episode: 58 Total Steps: 31702 , Ave. Reward: -0.029077670750031454 , Episode Length: 684 Max Up-Time: 107\n", + "Episode: 59 Total Steps: 32987 , Ave. Reward: -0.7201722007016701 , Episode Length: 1285 Max Up-Time: 61\n", + "Episode: 60 Total Steps: 33662 , Ave. Reward: -1.447542019951657 , Episode Length: 675 Max Up-Time: 36\n", + "Episode: 61 Total Steps: 34853 , Ave. Reward: -0.7092105324653137 , Episode Length: 1191 Max Up-Time: 60\n", + "Episode: 62 Total Steps: 35556 , Ave. Reward: -1.008758527335291 , Episode Length: 703 Max Up-Time: 70\n", + "Episode: 63 Total Steps: 37253 , Ave. Reward: -0.11298222499952609 , Episode Length: 1697 Max Up-Time: 71\n", + "Episode: 64 Total Steps: 37607 , Ave. Reward: -2.333472374918777 , Episode Length: 354 Max Up-Time: 42\n", + "Episode: 65 Total Steps: 38769 , Ave. Reward: -0.5435572183910845 , Episode Length: 1162 Max Up-Time: 86\n", + "Episode: 66 Total Steps: 40346 , Ave. Reward: -0.3510286890823708 , Episode Length: 1577 Max Up-Time: 35\n", + "Episode: 67 Total Steps: 41342 , Ave. Reward: -0.43548023889291587 , Episode Length: 996 Max Up-Time: 56\n", + "Episode: 68 Total Steps: 41732 , Ave. Reward: -0.5497570997221486 , Episode Length: 390 Max Up-Time: 23\n", + "Episode: 69 Total Steps: 42909 , Ave. Reward: 0.038723967455411054 , Episode Length: 1177 Max Up-Time: 50\n", + "Episode: 70 Total Steps: 43482 , Ave. Reward: -0.6763002936147164 , Episode Length: 573 Max Up-Time: 44\n", + "Episode: 71 Total Steps: 44648 , Ave. Reward: -0.9062670195421423 , Episode Length: 1166 Max Up-Time: 38\n", + "Episode: 72 Total Steps: 45758 , Ave. Reward: -0.30692741058433193 , Episode Length: 1110 Max Up-Time: 65\n", + "Episode: 73 Total Steps: 45969 , Ave. Reward: -5.942005811194079 , Episode Length: 211 Max Up-Time: 33\n", + "Episode: 74 Total Steps: 46338 , Ave. Reward: -2.397374412919753 , Episode Length: 369 Max Up-Time: 46\n", + "Episode: 75 Total Steps: 46748 , Ave. Reward: -0.8976414519854521 , Episode Length: 410 Max Up-Time: 51\n", + "Episode: 76 Total Steps: 47539 , Ave. Reward: -0.7642109985612977 , Episode Length: 791 Max Up-Time: 36\n", + "Episode: 77 Total Steps: 48602 , Ave. Reward: -0.28253135177080774 , Episode Length: 1063 Max Up-Time: 69\n", + "Episode: 78 Total Steps: 48996 , Ave. Reward: -0.9534163332275297 , Episode Length: 394 Max Up-Time: 20\n", + "Episode: 79 Total Steps: 50055 , Ave. Reward: -0.2423828962026703 , Episode Length: 1059 Max Up-Time: 91\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Episode: 80 Total Steps: 50944 , Ave. Reward: -0.6279331535186513 , Episode Length: 889 Max Up-Time: 52\n", + "Episode: 81 Total Steps: 52280 , Ave. Reward: -0.20073270114025632 , Episode Length: 1336 Max Up-Time: 83\n", + "Episode: 82 Total Steps: 53883 , Ave. Reward: -0.08541144990249938 , Episode Length: 1603 Max Up-Time: 122\n", + "Episode: 83 Total Steps: 54334 , Ave. Reward: -0.34031538317112564 , Episode Length: 451 Max Up-Time: 44\n", + "Episode: 84 Total Steps: 55892 , Ave. Reward: -0.8371231570383377 , Episode Length: 1558 Max Up-Time: 34\n", + "Episode: 85 Total Steps: 57479 , Ave. Reward: -0.699271602876404 , Episode Length: 1587 Max Up-Time: 41\n", + "Episode: 86 Total Steps: 61409 , Ave. Reward: -0.1733200848853596 , Episode Length: 3930 Max Up-Time: 67\n", + "Episode: 87 Total Steps: 63851 , Ave. Reward: -0.08644986908609424 , Episode Length: 2442 Max Up-Time: 81\n", + "Episode: 88 Total Steps: 69570 , Ave. Reward: -0.09404408310442923 , Episode Length: 5719 Max Up-Time: 38\n", + "Episode: 89 Total Steps: 76704 , Ave. Reward: -0.17318490459394822 , Episode Length: 7134 Max Up-Time: 35\n", + "Episode: 90 Total Steps: 90786 , Ave. Reward: -0.0956459185629743 , Episode Length: 14082 Max Up-Time: 28\n" + ] + } + ], + "source": [ + "# This is the environment\n", + "env = swingUp.SwingUpEnv()\n", + "\n", + "# For simplicity, we only consider forces of -1 and 1\n", + "numActions = 2\n", + "Actions = np.linspace(-1,1,numActions)\n", + "\n", + "# This is our learning agent\n", + "gamma = .95\n", + "agent = sarsaAgent(5,numActions,20,1,epsilon=5e-2,gamma=gamma,alpha=1e-5)\n", + "\n", + "maxSteps = 2e5\n", + "\n", + "# This is a helper to deal with the fact that x[2] is actually an angle\n", + "x_to_y = lambda x : np.array([x[0],x[1],np.cos(x[2]),np.sin(x[2]),x[3]])\n", + "\n", + "R = []\n", + "UpTime = []\n", + "\n", + "step = 0\n", + "ep = 0\n", + "while step < maxSteps:\n", + " ep += 1\n", + " x = env.reset()\n", + " C = 0.\n", + " \n", + " done = False\n", + " t = 1\n", + " while not done:\n", + " t += 1\n", + " step += 1\n", + " y = x_to_y(x)\n", + " a = agent.action(y)\n", + " u = Actions[a:a+1]\n", + " env.render()\n", + " x_next,c,done,info = env.step(u)\n", + " \n", + " max_up_time = info['max_up_time']\n", + " y_next = x_to_y(x_next)\n", + "\n", + " C += (1./t)*(c-C)\n", + " agent.update(y,a,c,y_next,done)\n", + " x = x_next\n", + " if done:\n", + " break\n", + " \n", + " if step >= maxSteps:\n", + " break\n", + " \n", + " \n", + " R.append(C)\n", + " UpTime.append(max_up_time)\n", + " #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time)\n", + " print('Episode:',ep,'Total Steps:',step,', Ave. Reward:',C,', Episode Length:',t-1, 'Max Up-Time:', max_up_time)\n", + "env.close()\n", + "\n", + "plt.plot(UpTime)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In each episode, the system tracks the \"Maximum Up-Time\", which is the longest stretch of time that the pendulum tip was above the horizontal" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Question\n", + "\n", + "Implement your own controller or modify the code above. Try a different function approximation and see if you can get longer up time, learn more quickly, or more consistently. As with the code above, plot the maximum up time for each episode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make your code here." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/System/Pendulum/initialize_system.py b/System/Pendulum/initialize_system.py new file mode 100644 index 0000000..38b7134 --- /dev/null +++ b/System/Pendulum/initialize_system.py @@ -0,0 +1,18 @@ +# This file should be run on system startup. It will initialize the linear position to the center so that all tests originate from a proper position. +# The center is found by using the hardware limit switches +from system import System +import RPi.GPIO as GPIO + +# Main program +print("Got to init") +sys = System() +sys.initialize() +GPIO.cleanup() + + +##debug version +#print("alive") +#sys = System() +#limit_negative_pin = 19 +#while(1): +# print(GPIO.input(limit_negative_pin)) \ No newline at end of file diff --git a/System/Pendulum/motor.py b/System/Pendulum/motor.py new file mode 100644 index 0000000..427d393 --- /dev/null +++ b/System/Pendulum/motor.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +# Import required modules +import RPi.GPIO as GPIO +from time import sleep + +# Constants: parameters that the caller cannot modify +# Frequency: We have determined that the optimal frequency for our motor is 1kHz +pwm_frequency = 1000 + +# Motor Class +# This controls the motor at the given IO +class Motor: + def __init__(self, speed_pin, forward_pin, reverse_pin): + # Set the board IO (just in case it hasn't been done yet) + GPIO.setmode(GPIO.BCM) + # Set our variables for the directional pins + self.forward_pin = forward_pin + self.reverse_pin = reverse_pin + # setup the IO + GPIO.setup(speed_pin, GPIO.OUT) + GPIO.setup(self.forward_pin, GPIO.OUT) + GPIO.setup(self.reverse_pin, GPIO.OUT) + # Set speed pin as a PWM output + self.speed_pwm = GPIO.PWM(speed_pin, pwm_frequency) + self.current_speed = 0. + # END __init__ + + # Move the motor at a given speed, given as a floating point percentage (-100 <= x <= 100) + # If speed is less than 0, motor will run in reverse, otherwise it will run forward + def move(self, speed): + if speed == self.current_speed: + # do not attempt to readjust speed; this can cause erratic behavior + return + if speed < -100.0 or speed > 100.0: + return + # Stop any previous movements + self.speed_pwm.stop() + # Set the duty cycle for the speed of the motor + self.speed_pwm.ChangeDutyCycle(abs(speed)) + if speed < 0: + # Set direction to reverse + GPIO.output(self.forward_pin, GPIO.LOW) + GPIO.output(self.reverse_pin, GPIO.HIGH) + else: + # Set the direction to forward + GPIO.output(self.forward_pin, GPIO.HIGH) + GPIO.output(self.reverse_pin, GPIO.LOW) + # Start the PWM output to start moving the motor + self.speed_pwm.start(abs(speed)) + self.current_speed = speed + sleep((1./pwm_frequency) * 3) + # END Move + + # Stop the motor from spinning. + # To brake the motor, both direction outputs are set to HIGH + def brake(self): + # Stop any current PWM signals + self.speed_pwm.stop() + # Set the direction outputs to brake + GPIO.output(self.forward_pin, GPIO.HIGH) + GPIO.output(self.reverse_pin, GPIO.HIGH) + self.current_speed = 0. + # END Brake + + # Set the motor to coast (i.e. Do not provide power to the motor, but still allow it to spin) + # To coast the motor, both direction outputs are set to LOW + def coast(self): + # Stop any current PWM signals + self.speed_pwm.stop() + # Set the direction outputs to coast + GPIO.output(self.forward_pin, GPIO.LOW) + GPIO.output(self.reverse_pin, GPIO.LOW) + self.current_speed = 0. + # END Coast + \ No newline at end of file diff --git a/System/Pendulum/results.csv b/System/Pendulum/results.csv new file mode 100644 index 0000000..ca22685 --- /dev/null +++ b/System/Pendulum/results.csv @@ -0,0 +1,252 @@ +this is a test with speed 1 +timestamp,angle(Radians),position(inches),speed(percentage) +21:11:03.518250,3.638602,11.781250,1.000000 +this is a test with speed 2 +timestamp,angle(Radians),position(inches),speed(percentage) +21:11:10.747827,3.141593,0.000000,2.000000 +21:11:10.951560,3.141593,0.000000,2.000000 +21:11:11.155652,3.141593,0.000000,2.000000 +21:11:11.359968,3.141593,0.000000,2.000000 +21:11:11.564011,3.196816,10.648438,2.000000 +this is a test with speed 1 +timestamp,angle(Radians),position(inches),speed(percentage) +21:21:23.365529,3.061826,-12.347656,1.000000 +21:21:23.569516,3.061826,-12.347656,1.000000 +21:21:23.774907,3.061826,-12.291016,1.000000 +21:21:23.978789,3.074097,-12.234375,1.000000 +21:21:24.182513,3.098641,-11.951172,1.000000 +21:21:24.387526,3.172272,-11.328125,1.000000 +21:21:24.593523,3.190680,-10.535156,1.000000 +21:21:24.797276,3.135457,-9.628906,1.000000 +21:21:25.000997,3.092505,-8.666016,1.000000 +21:21:25.204894,3.123185,-7.589844,1.000000 +21:21:25.409312,3.178408,-6.457031,1.000000 +21:21:25.613246,3.178408,-5.494141,1.000000 +21:21:25.816969,3.129321,-4.248047,1.000000 +21:21:26.020770,3.098641,-2.888672,1.000000 +21:21:26.224634,3.117049,-1.642578,1.000000 +21:21:26.428394,3.160000,-0.453125,1.000000 +21:21:26.632123,3.160000,0.623047,1.000000 +21:21:26.836130,3.129321,1.416016,1.000000 +21:21:27.041016,3.123185,2.322266,1.000000 +21:21:27.245016,3.153864,3.398438,1.000000 +21:21:27.448770,3.172272,4.757812,1.000000 +21:21:27.652601,3.147729,6.003906,1.000000 +21:21:27.857510,3.123185,7.250000,1.000000 +21:21:28.061582,3.129321,8.212891,1.000000 +21:21:28.265318,3.153864,9.062500,1.000000 +21:21:28.469456,3.160000,9.798828,1.000000 +21:21:28.674342,3.166136,10.931641,1.000000 +this is a test with speed 2 +timestamp,angle(Radians),position(inches),speed(percentage) +21:21:40.385974,3.123185,-11.328125,2.000000 +21:21:40.589873,3.153864,-11.328125,2.000000 +21:21:40.793628,3.153864,-11.328125,2.000000 +21:21:40.997389,3.123185,-11.271484,2.000000 +21:21:41.201337,3.123185,-11.101562,2.000000 +21:21:41.405074,3.160000,-10.478516,2.000000 +21:21:41.608862,3.172272,-9.685547,2.000000 +21:21:41.812892,3.147729,-8.496094,2.000000 +21:21:42.017833,3.123185,-7.250000,2.000000 +21:21:42.226387,3.123185,-6.117188,2.000000 +21:21:42.430203,3.141593,-5.041016,2.000000 +21:21:42.634253,3.153864,-3.908203,2.000000 +21:21:42.838705,3.147729,-2.605469,2.000000 +21:21:43.042663,3.135457,-1.189453,2.000000 +21:21:43.246393,3.153864,0.396484,2.000000 +21:21:43.450282,3.153864,1.699219,2.000000 +21:21:43.654589,3.147729,3.171875,2.000000 +21:21:43.858462,3.117049,4.701172,2.000000 +21:21:44.062365,3.110913,6.003906,2.000000 +21:21:44.266291,3.147729,7.136719,2.000000 +21:21:44.470675,3.166136,8.156250,2.000000 +21:21:44.675230,3.160000,9.289062,2.000000 +21:21:44.879104,3.123185,10.308594,2.000000 +this is a test with speed 3 +timestamp,angle(Radians),position(inches),speed(percentage) +21:21:56.181311,3.129321,-11.667969,3.000000 +21:21:56.385221,3.135457,-11.667969,3.000000 +21:21:56.588969,3.135457,-11.611328,3.000000 +21:21:56.792985,3.147729,-11.328125,3.000000 +21:21:56.997731,3.172272,-10.705078,3.000000 +21:21:57.201605,3.166136,-9.515625,3.000000 +21:21:57.405341,3.153864,-7.986328,3.000000 +21:21:57.609332,3.129321,-6.400391,3.000000 +21:21:57.813877,3.117049,-4.984375,3.000000 +21:21:58.017821,3.141593,-3.455078,3.000000 +21:21:58.221548,3.153864,-1.585938,3.000000 +21:21:58.425285,3.153864,0.113281,3.000000 +21:21:58.629381,3.141593,1.642578,3.000000 +21:21:58.833311,3.141593,3.341797,3.000000 +21:21:59.037050,3.123185,5.097656,3.000000 +21:21:59.240776,3.123185,6.740234,3.000000 +21:21:59.445332,3.147729,8.156250,3.000000 +21:21:59.649453,3.153864,9.515625,3.000000 +21:21:59.853174,3.141593,10.875000,3.000000 +this is a test with speed 4 +timestamp,angle(Radians),position(inches),speed(percentage) +21:22:10.551392,3.074097,-11.667969,4.000000 +21:22:10.755403,3.166136,-11.611328,4.000000 +21:22:10.959199,3.209088,-11.611328,4.000000 +21:22:11.162979,3.141593,-11.611328,4.000000 +21:22:11.367035,3.067962,-11.667969,4.000000 +21:22:11.571220,3.092505,-11.667969,4.000000 +21:22:11.775098,3.172272,-26.167969,4.000000 +21:22:11.978903,3.190680,-26.111328,4.000000 +21:22:12.182761,3.129321,-26.167969,4.000000 +this is a test with speed 1 +timestamp,angle(Radians),position(inches),speed(percentage) +21:24:03.238594,3.049554,-10.988281,1.000000 +21:24:03.443363,3.319534,-11.384766,1.000000 +21:24:03.647160,3.460661,-11.384766,1.000000 +21:24:03.850915,3.252039,-11.328125,1.000000 +21:24:04.055110,3.055690,-10.421875,1.000000 +21:24:04.259089,3.147729,-8.439453,1.000000 +21:24:04.462815,3.294991,-7.080078,1.000000 +21:24:04.666720,3.294991,-6.287109,1.000000 +21:24:04.871045,3.209088,-5.210938,1.000000 +21:24:05.075400,3.166136,-3.738281,1.000000 +21:24:05.279280,3.215224,-1.812500,1.000000 +21:24:05.483051,3.270447,-0.056641,1.000000 +21:24:05.687202,3.258175,1.472656,1.000000 +21:24:05.891495,3.196816,3.171875,1.000000 +21:24:06.095450,3.172272,5.154297,1.000000 +21:24:06.299177,3.202952,6.853516,1.000000 +21:24:06.503022,3.227496,7.986328,1.000000 +21:24:06.707304,3.221360,8.779297,1.000000 +21:24:06.911174,3.209088,9.515625,1.000000 +21:24:07.115038,3.227496,10.535156,1.000000 +this is a test with speed 2 +timestamp,angle(Radians),position(inches),speed(percentage) +21:24:17.195756,3.288855,-26.621094,2.000000 +21:24:17.400051,3.233632,-26.621094,2.000000 +21:24:17.603796,3.160000,-26.564453,2.000000 +21:24:17.807579,3.258175,-25.261719,2.000000 +21:24:18.011603,3.313399,-23.505859,2.000000 +21:24:18.216375,3.202952,-22.373047,2.000000 +21:24:18.422944,3.086369,-21.523438,2.000000 +21:24:18.627284,3.166136,-20.900391,2.000000 +21:24:18.831221,3.331806,-19.937500,2.000000 +21:24:19.035149,3.362486,-18.521484,2.000000 +21:24:19.238927,3.196816,-16.708984,2.000000 +21:24:19.443200,3.067962,-14.953125,2.000000 +21:24:19.647538,3.153864,-13.650391,2.000000 +21:24:19.852128,3.319534,-12.404297,2.000000 +21:24:20.055988,3.325670,-11.214844,2.000000 +21:24:20.259682,3.209088,-9.572266,2.000000 +21:24:20.463588,3.110913,-7.873047,2.000000 +21:24:20.667910,3.160000,-6.626953,2.000000 +21:24:20.871916,3.270447,-5.607422,2.000000 +21:24:21.075676,3.301127,-4.474609,2.000000 +21:24:21.279456,3.209088,-3.341797,2.000000 +21:24:21.483292,3.129321,-2.208984,2.000000 +21:24:21.687757,3.123185,-2.039062,2.000000 +21:24:21.891745,3.245903,-2.039062,2.000000 +21:24:22.095769,3.325670,-2.039062,2.000000 +21:24:22.299703,3.258175,-2.095703,2.000000 +21:24:22.503702,3.202952,-2.095703,2.000000 +21:24:22.707592,3.270447,-2.095703,2.000000 +21:24:22.911564,3.276583,-2.208984,2.000000 +21:24:23.115698,3.264311,-3.681641,2.000000 +21:24:23.320113,3.307263,-6.060547,2.000000 +21:24:23.523990,3.368622,-8.835938,2.000000 +21:24:23.727764,3.423845,-12.177734,2.000000 +21:24:23.931581,3.479068,-14.896484,2.000000 +21:24:24.135508,3.491340,-17.162109,2.000000 +21:24:24.339394,3.331806,-17.445312,2.000000 +21:24:24.543121,3.184544,-17.388672,2.000000 +21:24:24.747104,3.141593,-17.388672,2.000000 +21:24:24.951503,3.209088,-17.388672,2.000000 +21:24:25.155448,3.282719,-17.388672,2.000000 +21:24:25.359181,3.258175,-17.388672,2.000000 +21:24:25.563220,3.184544,-17.445312,2.000000 +21:24:25.767602,3.160000,-17.388672,2.000000 +21:24:25.971902,3.221360,-17.388672,2.000000 +21:24:26.175822,3.270447,-17.388672,2.000000 +21:24:26.379473,3.239767,-17.445312,2.000000 +21:24:26.583204,3.184544,-17.388672,2.000000 +21:24:26.787312,3.178408,-17.388672,2.000000 +21:24:26.991191,3.227496,-17.388672,2.000000 +21:24:27.195040,3.258175,-17.388672,2.000000 +21:24:27.398920,3.233632,-17.445312,2.000000 +21:24:27.602646,3.190680,-17.388672,2.000000 +21:24:27.806773,3.196816,-17.445312,2.000000 +21:24:28.010824,3.227496,-17.445312,2.000000 +21:24:28.214705,3.239767,-17.388672,2.000000 +21:24:28.418434,3.215224,-17.445312,2.000000 +21:24:28.622173,3.196816,-17.445312,2.000000 +21:24:28.827388,3.209088,-17.445312,2.000000 +21:24:29.031949,3.227496,-17.388672,2.000000 +21:24:29.235813,3.227496,-17.388672,2.000000 +21:24:29.439567,3.215224,-17.388672,2.000000 +21:24:29.643433,3.209088,-17.445312,2.000000 +21:24:29.847229,3.221360,-17.388672,2.000000 +21:24:30.051008,3.221360,-17.388672,2.000000 +21:24:30.254897,3.221360,-17.445312,2.000000 +21:24:30.460323,3.215224,-17.445312,2.000000 +21:24:30.664998,3.215224,-17.445312,2.000000 +21:24:30.868735,3.215224,-17.388672,2.000000 +21:24:31.072555,3.221360,-17.388672,2.000000 +21:24:31.278097,3.221360,-17.388672,2.000000 +21:24:31.482317,3.215224,-17.388672,2.000000 +21:24:31.686053,3.221360,-17.388672,2.000000 +21:24:31.889814,3.215224,-17.445312,2.000000 +21:24:32.094156,3.215224,-17.445312,2.000000 +21:24:32.298057,3.215224,-17.445312,2.000000 +21:24:32.501811,3.215224,-17.445312,2.000000 +21:24:32.705567,3.209088,-17.558594,2.000000 +21:24:32.909636,3.123185,-19.031250,2.000000 +21:24:33.113681,3.129321,-20.957031,2.000000 +21:24:33.317471,3.258175,-22.882812,2.000000 +21:24:33.521209,3.331806,-24.808594,2.000000 +21:24:33.725343,3.270447,-26.224609,2.000000 +21:24:33.929347,3.135457,-27.414062,2.000000 +21:24:34.133069,3.141593,-28.263672,2.000000 +21:24:34.336813,3.270447,-28.830078,2.000000 +21:24:34.540822,3.350214,-28.773438,2.000000 +21:24:34.745675,3.368622,-27.074219,2.000000 +21:24:34.949791,3.233632,-24.242188,2.000000 +21:24:35.153530,3.061826,-21.353516,2.000000 +21:24:35.357278,3.037282,-19.824219,2.000000 +21:24:35.561108,3.209088,-18.748047,2.000000 +21:24:35.765374,3.362486,-17.785156,2.000000 +21:24:35.969290,3.301127,-16.935547,2.000000 +21:24:36.173039,3.080233,-16.539062,2.000000 +21:24:36.377081,3.049554,-16.425781,2.000000 +21:24:36.580936,3.239767,-16.425781,2.000000 +21:24:36.785009,3.380894,-16.425781,2.000000 +21:24:36.988996,3.294991,-16.482422,2.000000 +21:24:37.192991,3.104777,-16.425781,2.000000 +21:24:37.396751,3.074097,-16.425781,2.000000 +21:24:37.600523,3.239767,-16.425781,2.000000 +21:24:37.804401,3.362486,-16.425781,2.000000 +21:24:38.008162,3.276583,-16.425781,2.000000 +21:24:38.212000,3.110913,-16.425781,2.000000 +21:24:38.416104,3.098641,-16.425781,2.000000 +21:24:38.619845,3.252039,-16.425781,2.000000 +21:24:38.823609,3.344078,-16.425781,2.000000 +21:24:39.027425,3.258175,-16.425781,2.000000 +21:24:39.231822,3.117049,-16.425781,2.000000 +21:24:39.435720,3.117049,-16.425781,2.000000 +this is a test with speed 1 +timestamp,angle(Radians),position(inches),speed(percentage) +21:26:03.992735,3.153864,-11.611328,1.000000 +21:26:04.196730,3.086369,-11.611328,1.000000 +21:26:04.400491,3.055690,-10.818359,1.000000 +21:26:04.604225,3.055690,-9.968750,1.000000 +21:26:04.807985,3.086369,-9.119141,1.000000 +21:26:05.011982,3.110913,-8.269531,1.000000 +21:26:05.215941,3.080233,-7.419922,1.000000 +21:26:05.419661,3.043418,-6.683594,1.000000 +21:26:05.623503,3.055690,-6.003906,1.000000 +21:26:05.827343,3.104777,-5.097656,1.000000 +21:26:06.032396,3.123185,-4.021484,1.000000 +21:26:06.236582,3.086369,-2.548828,1.000000 +21:26:06.440336,3.067962,-0.849609,1.000000 +21:26:06.644314,3.098641,1.019531,1.000000 +21:26:06.848124,3.110913,2.775391,1.000000 +21:26:07.051824,3.067962,4.701172,1.000000 +21:26:07.256424,3.055690,6.853516,1.000000 +21:26:07.460299,3.049554,8.269531,1.000000 +21:26:07.664164,3.055690,9.232422,1.000000 +21:26:07.867919,3.086369,10.195312,1.000000 diff --git a/System/Pendulum/swingUp.py b/System/Pendulum/swingUp.py new file mode 100644 index 0000000..fb5d4c0 --- /dev/null +++ b/System/Pendulum/swingUp.py @@ -0,0 +1,207 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from http://incompleteideas.net/sutton/book/code/pole.c +permalink: https://perma.cc/C9ZM-652R +""" + +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np + +class SwingUpEnv(gym.Env): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity. + + Source: + This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson + + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -Inf Inf + 3 Pole Velocity At Tip -Inf Inf + + Actions: + Type: Box(1) + Num Action Min Max + 0 Push cart -1 1 + + Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it + + Reward: + Reward is 1 for every step taken, including the termination step + + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + + Episode Termination: + Pole Angle is more than 12 degrees + Cart Position is more than 2.4 (center of the cart reaches the edge of the display) + Episode length is greater than 200 + Solved Requirements + Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + self.gravity = 9.8 + self.masscart = 1.0 + self.masspole = 0.1 + self.total_mass = (self.masspole + self.masscart) + self.length = 0.5 # actually half the pole's length + self.polemass_length = (self.masspole * self.length) + self.force_mag = 10.0 + self.tau = 0.02 # seconds between state updates + self.kinematics_integrator = 'euler' + + # Angle at which to fail the episode + self.x_threshold = 2.4 + self.x_dot_threshold = 10. + self.theta_dot_threshold = 3*np.pi + + # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds + high = np.array([ + self.x_threshold*2, + self.x_dot_threshold, + np.finfo(np.float32).max, + np.finfo(np.float32).max]) + + + self.action_space = spaces.Box(-np.ones(1),np.ones(1),dtype=np.float32) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + self.viewer = None + self.state = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) + state = self.state + x, x_dot, theta, theta_dot = state + force = self.force_mag * action[0] + + costheta = math.cos(theta) + sintheta = math.sin(theta) + + if costheta > 0: + self.up_time += 1 + self.max_up_time = np.max([self.up_time,self.max_up_time]) + + else: + self.up_time = 0 + + temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass + thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) + xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass + if self.kinematics_integrator == 'euler': + x = x + self.tau * x_dot + x_dot = x_dot + self.tau * xacc + theta = theta + self.tau * theta_dot + theta_dot = theta_dot + self.tau * thetaacc + else: # semi-implicit euler + x_dot = x_dot + self.tau * xacc + x = x + self.tau * x_dot + theta_dot = theta_dot + self.tau * thetaacc + theta = theta + self.tau * theta_dot + self.state = (x,x_dot,theta,theta_dot) + done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta_dot < -self.theta_dot_threshold \ + or theta_dot > self.theta_dot_threshold + done = bool(done) + + if not done: + reward = np.ceil(costheta) + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = -(100 * (np.abs(x_dot)+np.abs(theta_dot))) + else: + if self.steps_beyond_done == 0: + logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") + self.steps_beyond_done += 1 + reward = 0.0 + + return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time} + + def reset(self): + self.state = self.np_random.uniform(low=-.5,high=.5,size=(4,)) + self.state[2] += np.pi + self.up_time = 0 + self.max_up_time = 0 + self.up = False + self.steps_beyond_done = None + return np.array(self.state) + + def render(self, mode='human'): + screen_width = 600 + screen_height = 400 + + world_width = self.x_threshold*2 + scale = screen_width/world_width + carty = 100 # TOP OF CART + polewidth = 10.0 + polelen = scale * (2 * self.length) + cartwidth = 50.0 + cartheight = 30.0 + + if self.viewer is None: + from gym.envs.classic_control import rendering + self.viewer = rendering.Viewer(screen_width, screen_height) + l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2 + axleoffset =cartheight/4.0 + cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) + self.carttrans = rendering.Transform() + cart.add_attr(self.carttrans) + self.viewer.add_geom(cart) + l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2 + pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) + pole.set_color(.8,.6,.4) + self.poletrans = rendering.Transform(translation=(0, axleoffset)) + pole.add_attr(self.poletrans) + pole.add_attr(self.carttrans) + self.viewer.add_geom(pole) + self.axle = rendering.make_circle(polewidth/2) + self.axle.add_attr(self.poletrans) + self.axle.add_attr(self.carttrans) + self.axle.set_color(.5,.5,.8) + self.viewer.add_geom(self.axle) + self.track = rendering.Line((0,carty), (screen_width,carty)) + self.track.set_color(0,0,0) + self.viewer.add_geom(self.track) + + self._pole_geom = pole + + if self.state is None: return None + + # Edit the pole polygon vertex + pole = self._pole_geom + l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2 + pole.v = [(l,b), (l,t), (r,t), (r,b)] + + x = self.state + cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART + self.carttrans.set_translation(cartx, carty) + self.poletrans.set_rotation(-x[2]) + + return self.viewer.render(return_rgb_array = mode=='rgb_array') + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None diff --git a/System/Pendulum/system.py b/System/Pendulum/system.py new file mode 100644 index 0000000..21439aa --- /dev/null +++ b/System/Pendulum/system.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python +from .motor import Motor +from .encoder import Encoder +import math +from datetime import datetime +from time import sleep +import RPi.GPIO as GPIO +import sys +# import os +count2 = 0 +## +import cmath +## + + + + +from threading import Thread, Lock + +# IO pin definitions +### Motor pins +motor_speed_pin = 17 +motor_forward_pin = 27 +motor_reverse_pin = 22 +### Encoder pins (shared by both encoders) +encoder_clock_pin = 3 +encoder_data_pin = 2 +### Angular encoder pins +encoder_angular_cs_pin = 4 +### Linear encoder pins +encoder_linear_cs_pin = 23 +### Limit switch pins (configured to PULLUP) + +#FLIPPING THESE BELOW +#limit_negative_pin = 19 +#limit_positive_pin = 26 +limit_negative_pin = 26 +limit_positive_pin = 19 + +# System parameters +system_max_x = 16.5 +system_min_x = -16.5 +downloads_reference_dest = "." +default_results_fileName = "results" + + +# System Class +# This is the primary interface a student will use to control the pendulum. +class System: + def __init__(self, negative_limit=float('nan'), positive_limit=float('nan'), angular_units='Degrees', sw_limit_routine=None): + GPIO.setwarnings(False) + self.deinit = False + # Initialize public variables + self.max_x = system_max_x + self.min_x = system_min_x + # Initialize the motor. + self.motor = Motor(motor_speed_pin, motor_forward_pin, motor_reverse_pin) + # Initialize the angular encoder. + self.encoder_angular = Encoder(encoder_clock_pin, encoder_angular_cs_pin, encoder_data_pin) + self.encoder_angular.set_zero(offset = 512) # set offset so that 0 is upright vertical + # Initialize the linear encoder. + self.encoder_linear = Linear_Encoder(encoder_clock_pin, encoder_linear_cs_pin, encoder_data_pin) + # We assume that the system has been initialized on startup to a 0 position, or that the previous run ended by returning the system to 0 + self.encoder_linear.set_zero() + + self.angular_units = angular_units + + # Enable hardware interrupts for hardware limit switches + GPIO.setup(limit_negative_pin, GPIO.IN, pull_up_down=GPIO.PUD_UP) + GPIO.add_event_detect(limit_negative_pin, GPIO.FALLING, callback=self.negative_limit_callback) + GPIO.setup(limit_positive_pin, GPIO.IN, pull_up_down=GPIO.PUD_UP) + GPIO.add_event_detect(limit_positive_pin, GPIO.FALLING, callback=self.positive_limit_callback) + self.interrupted = False + + # Setup soft limits if defined by the user (this is "challenge mode" for the user, making the constraints more difficult). + # By default, the soft limits will not be used (when set NaN), and the whole extent of the system is available (to the HW limits). + self.negative_soft_limit = negative_limit + self.positive_soft_limit = positive_limit + # If both limits have been defined, verify that they are valid (i.e. positive limit must be greater than the negative limit) + if not math.isnan(negative_limit) and not math.isnan(positive_limit) and not negative_limit < positive_limit: + print("ERROR: Invalid software limits provided. Must be valid floating-point numbers and positive limit must be greater than negative limit. Software limits will be disabled.") + self.negative_soft_limit = float('nan') + self.positive_soft_limit = float('nan') + # NOTE: If only one limit has been defined, this should always work (hardware limits will be the absolute edge on the undefined side, although this would be difficult for users to utilize unless we provide the position of the hardware limits on each side + # NOTE: If neither limit is defined, the hardware limits will be the only limits in effect. + self.sw_limit_routine = self.limit_triggered + if sw_limit_routine is not None: + self.sw_limit_routine = sw_limit_routine + + # Create and setup results file (to be sent back to the server and displayed/downloaded to the user) + # Results file is a CSV with the following entries: angle, position, speed + self.result_filename = f"{sys.argv[0].split('.')[0]}.csv" + print("self.result_filename") + print(self.result_filename) + # Open the file for write mode. The file will get created, assuming it does not already exist. + result_file = open(self.result_filename, "x") + result_file.write(f"timestamp,angle({angular_units}),position(inches),speed(percentage)\n") + result_file.close() + + # Setup a thread to constantly be measuring encoder positions + #self.encoder_thread = EncoderThread(instance = self) + self.encoder_thread = Thread(target = self.encoder_thread_routine) + self.encoder_thread.setDaemon(True) + self.angular_position = 0. + self.linear_position = 0. + self.encoder_thread.start() + # END __init__() + + # Destructor + # Brake the motor and call GPIO.cleanup as a last-chance of doing so + def __del__(self): + self.motor.brake() + GPIO.cleanup() + # END __del__() + + def initialize(self): + print("begin initialize") + # Temporarily disable the limit switch interrupts: we do not want the program to exit if the switch is triggered + GPIO.remove_event_detect(limit_negative_pin) + GPIO.remove_event_detect(limit_positive_pin) + # Begin moving slowly in the negative direction until the negative limit switch is triggered + if not GPIO.input(limit_negative_pin) == False: + self.motor.move(-5) + pressed = True + while pressed != False: + pressed = GPIO.input(limit_negative_pin) + #print(pressed) + sleep(0.01) + self.motor.brake() + print("hit negative end stop") + # Set zero at the negative end of the track for easy reference in determining the extent + self.encoder_linear.set_zero() + sleep(1) + # Begin moving slowly in the positive direction until the positive limit switch is triggered + self.motor.move(5) + pressed = True + while pressed != False: + # We must continue reading linear encoder motion to keep track of rotations + pressed = GPIO.input(limit_positive_pin) + sleep(0.01) + self.motor.brake() + print("hit positive endstop") + # Get the current position (the extent of the track) + extent = self.linear_position + # Move back towards the center until we reach position extent/2 + position = extent + sleep(1) + self.motor.move(-4) + while position >= (extent / 2.): + position = self.linear_position + #print(position) + sleep(0.015) + self.motor.brake() + # Set zero again: this is the real zero + self.encoder_linear.set_zero() + # Re-enable the limit switch interrupts + GPIO.add_event_detect(limit_negative_pin, GPIO.FALLING, callback=self.negative_limit_callback, bouncetime=300) + GPIO.add_event_detect(limit_positive_pin, GPIO.FALLING, callback=self.positive_limit_callback, bouncetime=300) + print("Finished the initialize func") + # END initialize + + # Return home, cleanup IO. This should be called when exiting the program + def deinitialize(self): + self.return_home() + self.motor.brake() + self.deinit = True + if self.encoder_thread.is_alive(): + self.encoder_thread.join() + sleep(1) + GPIO.cleanup() + + # Get the values of the encoders to determine the angular and linear position of the pendulum. + # Values are returned as a tuple: (angle, linear). + ### angle: 0 indicates the pendulum is exactly straight up. + ##### 180 or -180 indicate the pendulum is exactly straight down. + ##### Positive values indicate the pendulum is leaning to the right. + ##### Negative values indicate the pendulum is leaning to the left. + ### linear: 0 indicates the pendulum is exactly in the middle of the track. + ##### Positive values indicate the pendulum is right-of-center. + ##### Negative values indicate the pendulum is left-of-center. + def measure(self): + return (self.angular_position, self.linear_position) + # END measure() + + + # Thread routine (0.1s interval). Get the values of the encoders to determine the angular and linear position of the pendulum. + # Values are saved in the class (self.angular_position and self.linear_position), which are then simply returned by measure() + def encoder_thread_routine(self): + limit_serviced = False + while self.deinit == False: + angular_position = self.encoder_angular.read_position(self.angular_units) + if self.angular_units == 'Degrees': + if angular_position > 180.: + angular_position = angular_position - 360. + self.angular_position = angular_position + self.linear_position = self.encoder_linear.read_position() + # Check soft limits + if (not math.isnan(self.negative_soft_limit)) and self.linear_position < self.negative_soft_limit: #or self.linear_position < self.min_x: + if limit_serviced == False: + limit_serviced = True + # SW limit reached: stop the motor, set the interrupted flag so that the motor cannot continue to move until the interrupt has been completely serviced + #self.interrupted = True + self.motor.brake() + # Print negative soft limit violation to the results file. + result_file = open(self.result_filename, "a") + result_file.write("Negative software limit %f has been reached!\n" % self.negative_soft_limit) + result_file.close() + # Fire the limit trigger method + self.sw_limit_routine() + elif (not math.isnan(self.positive_soft_limit)) and self.linear_position > self.positive_soft_limit: #or self.linear_position > self.max_x: + if limit_serviced == False: + limit_serviced = True + # SW limit reached: stop the motor, set the interrupted flag so that the motor cannot continue to move until the interrupt has been completely serviced + #self.interrupted = True + self.motor.brake() + # Print positive soft limit violation to the results file. + result_file = open(self.result_filename, "a") + result_file.write("Positive software limit %f has been reached!\n" % self.positive_soft_limit) + result_file.close() + # Fire the limit trigger method + self.sw_limit_routine() + elif limit_serviced == True and self.linear_position > (self.negative_soft_limit+0.5) and self.linear_position < (self.positive_soft_limit-0.5): + # Clear the limit service flag once we return to a reasonable range that the limit will not trigger again + limit_serviced = False + # This thread should run on ~0.01s intervals + sleep(0.01) + + # Adjust the pendulum's linear position using the motor. + ### speed: Acceptable values range from -100 to 100 (as a percentage), with 100/-100 being the maximum adjustment speed. + ##### Negative values will move the pendulum to the left. + ##### Positive values will move the pendulum to the right. + def adjust(self, speed): + if self.interrupted == False: + if speed != 0: + # cap the speed inputs + if speed > 100.: + speed = 100. + if speed < -100.: + speed = -100. + # change the motor speed + # TODO: Make sure the motor is oriented so that positive speed the correct direction (same for negative). Change the values otherwise. + self.motor.coast() + self.motor.move(speed) + else: + self.motor.coast() + # END adjust() + + # Append data to the results file + def add_results(self, angle, position, speed): + # open the results file + result_file = open(self.result_filename, "a") + # Write the results + result_file.write("%s," % datetime.now().strftime("%H:%M:%S.%f")) # Write current time + result_file.write("%f," % angle) # Write angle + result_file.write("%f," % position) # Write position + result_file.write("%f\n" % speed) # Write speed (end of line) + # Close the results file + result_file.close() + # END add_results + + def add_log(self, message): + # open the results file + result_file = open(self.result_filename, "a") + # Write the log + result_file.write("%s\n" % message) + # re-write the csv headers for next logging + result_file.write(f"timestamp,angle({self.angular_units}),position(inches),speed(percentage)\n") + # Close the results file + result_file.close() + + # Go back to the zero position (linear) so that the next execution starts in the correct place. + def return_home(self): + position = self.linear_position + # slowly move towards 0 until we get there + if position > 0: + self.motor.move(-4) + while position > 0: + position = self.linear_position + sleep(0.015) + self.motor.brake() + return + else: + self.motor.move(4) + while position < 0: + position = self.linear_position + sleep(0.015) + self.motor.brake() + return + # END return_home + + # Callback for when negative limit switch is triggered. + def negative_limit_callback(self, channel): + self.interrupted = True + self.motor.brake() + # Print negative limit trigger to the results file. + result_file = open(self.result_filename, "a") + result_file.write("Negative hardware limit has been reached!\n") + result_file.close() + # Fire the limit trigger method (stops motor, kills program immediately). + self.limit_triggered() + # END negative_limit_callback + # Callback for when positive limit switch is triggered. + def positive_limit_callback(self, channel): + self.interrupted = True + self.motor.brake() + # Print positive limit trigger to the results file. + result_file = open(self.result_filename, "a") + result_file.write("Positive hardware limit has been reached!\n") + result_file.close() + # Fire the limit trigger method (stops motor, kills program immediately). + self.limit_triggered() + # END positive_limit_callback + def limit_triggered(self): + sleep(1) + self.deinitialize() + sys.exit(1) +# END System + +# Linear Encoder class +# This class is to help with using an absolute encoder for linear position sensing as assembled in the physical system. +# The function definitions here are the same as with the regular encoder (pseudo-interface). +class Linear_Encoder: + PROPORTION = 14.5 + + def __init__(self, clk_pin, cs_pin, data_pin): + self.encoder = Encoder(clk_pin, cs_pin, data_pin) + self.set_zero() + def set_zero(self): + # Set the zero position for the encoder + self.encoder.set_zero() + # Reset the internal position counter + self.rotations = 0. + self.last_position = 0. + ###sam debug + self.last_position_filter = 0. + ###sam debug + def read_position(self): + ###sam debug + global count2 + test = 0 + count = 0 + while test == 0: + count += 1 + position = float(self.encoder.read_position('Raw') & 0b1111111100) + complex_current = cmath.exp(((position*2*math.pi)/1023)*1j) + complex_last = cmath.exp(((self.last_position*2*math.pi)/1023)*1j) + distance = math.sqrt((complex_current.real-complex_last.real)**2 + (complex_current.imag-complex_last.imag)**2) + if distance < 0.5 or count > 5: #this corresponds to a difference of 50 in the raw encoder position + test = 1 + #print(count) + count2 = count2 + count - 1 + print("global count") + print(count2) + ###sam debug + + # Read the position of the encoder (apply a noise filter, we don't need that much precision here) + #position = float(self.encoder.read_position('Raw') & 0b1111111100) + + + # Compare to last known position + # NOTE: For now, assume that we are moving the smallest possible distance (i.e. 5 -> 1 is -4, not 1020) + if (position - self.last_position) > 768.: + self.rotations = self.rotations - 1. + elif (position - self.last_position) < -768.: + self.rotations = self.rotations + 1. + + # Save the last position for the next calculation + self.last_position = position + # compute the position based on the system parameters + # linear position = (2pi*r)(n) + (2pi*r)(position/1024) = (2pi*r)(n + position/1024) = (pi*d)(n + position/1024) + print("sled position in inches") + print(self.PROPORTION*(self.rotations + position/1024.)) + return((self.PROPORTION)*(self.rotations + position/1024.)) diff --git a/System/Pendulum/system_swingup_test.py b/System/Pendulum/system_swingup_test.py new file mode 100644 index 0000000..e13c7ca --- /dev/null +++ b/System/Pendulum/system_swingup_test.py @@ -0,0 +1,293 @@ +import numpy as np +import numpy.random as rnd +import torch as pt + +import math +from gym import spaces, logger +from gym.utils import seeding + +from system import System +import time +from sys import exit + +class SwingUpEnv(): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity. + + Source: + This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson + + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -Inf Inf + 3 Pole Velocity At Tip -Inf Inf + + Actions: + Type: Box(1) + Num Action Min Max + 0 Push cart -1 1 + + Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it + + Reward: + Reward is 1 for every step taken, including the termination step + + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + + Episode Termination: + Pole Angle is more than 12 degrees + Cart Position is more than 2.4 (center of the cart reaches the edge of the display) + Episode length is greater than 200 + Solved Requirements + Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + self.sys = System(angular_units='Radians') + + self.force_mag = 10. + self.last_time = time.time() # time for seconds between updates + + # Angle at which to fail the episode + self.x_threshold = 10. + self.x_dot_threshold = 10. + self.theta_dot_threshold = 3*np.pi + + # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds + high = np.array([self.x_threshold*2, self.x_dot_threshold, np.finfo(np.float32).max, np.finfo(np.float32).max]) + + + self.action_space = spaces.Box(-np.ones(1), np.ones(1), dtype = np.float32) + + self.seed() + self.state = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) + state = self.state + x, x_dot, theta, theta_dot = state + force = self.force_mag * action[0] + self.sys.adjust(force) + + costheta = math.cos(theta) + sintheta = math.sin(theta) + + if costheta > 0: + self.up_time += 1 + self.max_up_time = np.max([self.up_time, self.max_up_time]) + + else: + self.up_time = 0 + + current_time = time.time() + tau = current_time - self.last_time + self.last_time = current_time + + new_theta, new_x = self.sys.measure() + theta_dot = (new_theta - theta) / tau + x_dot = (new_x - x) / tau + self.state = (new_x, x_dot, new_theta, theta_dot) + self.sys.add_results(new_theta, new_x, force) + + done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta_dot < -self.theta_dot_threshold \ + or theta_dot > self.theta_dot_threshold + done = bool(done) + + if not done: + reward = np.ceil(costheta) + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = -( 100 * (np.abs(x_dot) + np.abs(theta_dot)) ) + else: + if self.steps_beyond_done == 0: + logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") + self.steps_beyond_done += 1 + reward = 0.0 + + return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time} + + def reset(self, home = True): + if home == True: + self.sys.return_home() + time.sleep(1) + init_ang, lin = self.sys.measure() + time.sleep(0.05) + ang, lin = self.sys.measure() + self.state = (0, 0, ang, (ang-init_ang)/0.05) + + self.up_time = 0 + self.max_up_time = 0 + self.up = False + self.steps_beyond_done = None + return np.array(self.state) + + def end(self): + self.sys.deinitialize() + + +class nnQ(pt.nn.Module): + """ + Here is a basic neural network with for representing a policy + """ + + def __init__(self, stateDim, numActions, numHiddenUnits, numLayers): + super().__init__() + + InputLayer = [pt.nn.Linear(stateDim + numActions, numHiddenUnits), + pt.nn.ReLU()] + + HiddenLayers = [] + for _ in range(numLayers - 1): + HiddenLayers.append(pt.nn.Linear(numHiddenUnits, numHiddenUnits)) + HiddenLayers.append(pt.nn.ReLU()) + + + OutputLayer = [pt.nn.Linear(numHiddenUnits, 1)] + + AllLayers = InputLayer + HiddenLayers + OutputLayer + self.net = pt.nn.Sequential(*AllLayers) + + self.numActions = numActions + + def forward(self,x,a): + x = pt.tensor(x, dtype = pt.float32) + + b = pt.nn.functional.one_hot(pt.tensor(a).long(), self.numActions) + + c = b.float().detach() + y = pt.cat([x, c]) + + return self.net(y) + + +class sarsaAgent: + def __init__(self, stateDim, numActions, numHiddenUnits, numLayers, + epsilon = .1, gamma = .9, alpha = .1): + self.Q = nnQ(stateDim, numActions, numHiddenUnits, numLayers) + self.gamma = gamma + self.epsilon = epsilon + self.alpha = alpha + self.numActions = numActions + self.s_last = None + + def action(self, x): + # This is an epsilon greedy selection + a = 0 + if rnd.rand() < self.epsilon: + a = rnd.randint(0, numActions) + else: + qBest = -np.inf + for aTest in range(self.numActions): + qTest = self.Q(x, aTest).detach().numpy()[0] + if qTest > qBest: + qBest = qTest + a = aTest + return a + + def update(self, s, a, r, s_next,done): + # Compute the TD error, if there is enough data + update = True + if done: + Q_cur = self.Q(s, a).detach().numpy()[0] + delta = r - Q_cur + self.s_last = None + Q_diff = self.Q(s, a) + elif self.s_last is not None: + Q_next = self.Q(s, a).detach().numpy()[0] + Q_cur = self.Q(self.s_last, self.a_last).detach().numpy()[0] + delta = self.r_last + self.gamma * Q_next - Q_cur + Q_diff = self.Q(self.s_last, self.a_last) + else: + update = False + + # Update the parameter via the semi-gradient method + if update: + self.Q.zero_grad() + Q_diff.backward() + for p in self.Q.parameters(): + p.data.add_(self.alpha * delta, p.grad.data) + + if not done: + self.s_last = np.copy(s) + self.a_last = np.copy(a) + self.r_last = np.copy(r) + +# This is the environment +env = SwingUpEnv() + +# For simplicity, we only consider forces of -1 and 1 +numActions = 5 +Actions = np.linspace(-1, 1, numActions) + +# This is our learning agent +gamma = .95 +agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5) + +maxSteps = 5e4 + +# This is a helper to deal with the fact that x[2] is actually an angle +x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]]) + +R = [] +UpTime = [] + +step = 0 +ep = 0 +try: + while step < maxSteps: + ep += 1 + x = env.reset(home = ep > 1) + C = 0. + + done = False + t = 1 + while not done: + t += 1 + step += 1 + y = x_to_y(x) + a = agent.action(y) + u = Actions[a:a+1] + x_next, c, done, info = env.step(u) + + max_up_time = info['max_up_time'] + y_next = x_to_y(x_next) + + C += (1./t) * (c - C) + agent.update(y, a, c, y_next, done) + x = x_next + if done: + break + + if step >= maxSteps: + break + + + R.append(C) + UpTime.append(max_up_time) + #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time) + print('Episode:',ep, 'Total Steps:',step, ', Ave. Reward:',C, ', Episode Length:',t-1, 'Max Up-Time:',max_up_time) +except: + env.end() + exit(-1) +finally: + env.end() + exit(0) \ No newline at end of file diff --git a/System/Pendulum/system_swingup_test_2.py b/System/Pendulum/system_swingup_test_2.py new file mode 100644 index 0000000..fc02ed6 --- /dev/null +++ b/System/Pendulum/system_swingup_test_2.py @@ -0,0 +1,395 @@ +import numpy as np +import numpy.random as rnd +import torch as pt + +import math +from gym import spaces, logger +from gym.utils import seeding + +### +import sys +sys.path.insert(0, '/home/pi/pendulum/System') +### + + +from System.system import System +#from . import System +import time +from sys import exit + +class SwingUpEnv(): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity. + + Source: + This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson + + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -Inf Inf + 3 Pole Velocity At Tip -Inf Inf + + Actions: + Type: Box(1) + Num Action Min Max + 0 Push cart -1 1 + + Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it + + Reward: + Reward is 1 for every step taken, including the termination step + + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + + Episode Termination: + Pole Angle is more than 12 degrees + Cart Position is more than 2.4 (center of the cart reaches the edge of the display) + Episode length is greater than 200 + Solved Requirements + Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self): + self.x_threshold = 14. + self.sys = System(angular_units='Radians', positive_limit=self.x_threshold, negative_limit=-self.x_threshold, sw_limit_routine=self.x_threshold_routine) + + self.force_mag = 11. + self.last_time = time.time() # time for seconds between updates + + # Angle at which to fail the episode + self.x_dot_threshold = 10. + self.theta_dot_threshold = 3*np.pi + + # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds + high = np.array([self.x_threshold*2, self.x_dot_threshold, np.finfo(np.float32).max, np.finfo(np.float32).max]) + + + self.action_space = spaces.Box(-np.ones(1), np.ones(1), dtype = np.float32) + + self.seed() + self.state = None + + self.steps_beyond_done = None + self.done = False + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) + state = self.state + x, x_dot, theta, theta_dot = state + force = self.force_mag * action[0] + # Do not adjust the motor further if the x_threshold has been triggered by the SW limit + if self.done == False: + self.sys.adjust(force) + + costheta = math.cos(theta) + sintheta = math.sin(theta) + + if costheta > 0: + self.up_time += 1 + self.max_up_time = np.max([self.up_time, self.max_up_time]) + + else: + self.up_time = 0 + + current_time = time.time() + tau = current_time - self.last_time + self.last_time = current_time + + new_theta, new_x = self.sys.measure() + if (theta >= 0 and theta < math.pi/2.) and (new_theta > 3.*math.pi/2.): + theta_dot = (new_theta - (theta + 2.*math.pi)) / tau + elif (new_theta >= 0 and new_theta < math.pi/2.) and (theta > 3.*math.pi/2.): + theta_dot = ((new_theta + 2.*math.pi) - theta) / tau + else: + theta_dot = (new_theta - theta) / tau + x_dot = (new_x - x) / tau + self.state = (new_x, x_dot, new_theta, theta_dot) + self.sys.add_results(new_theta, new_x, force) + + '''done = theta_dot < -self.theta_dot_threshold \ + or theta_dot > self.theta_dot_threshold \ + or self.done == True''' + done = self.done + + '''done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta_dot < -self.theta_dot_threshold \ + or theta_dot > self.theta_dot_threshold \ + or self.done == True''' + done = bool(done) + + if not done: + reward = np.ceil(costheta) + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = -( 100 * (np.abs(x_dot) + np.abs(theta_dot)) ) + else: + if self.steps_beyond_done == 0: + logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") + self.steps_beyond_done += 1 + reward = 0.0 + + return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time} + + def x_threshold_routine(self): + print("SW Limit reached!") + self.done = True + self.sys.adjust(0) + + def reset(self, home = True): + if home == True: + self.sys.return_home() + time.sleep(1) + init_ang, lin = self.sys.measure() + time.sleep(0.05) + ang, lin = self.sys.measure() + self.state = (0, 0, ang, (ang-init_ang)/0.05) + + self.up_time = 0 + self.max_up_time = 0 + self.up = False + self.steps_beyond_done = None + self.done = False + return np.array(self.state) + + def end(self): + self.sys.deinitialize() + + def log(self, message): + self.sys.add_log(message) + print(message) + + +class nnQ(pt.nn.Module): + """ + Here is a basic neural network with for representing a policy + """ + + def __init__(self, stateDim, numActions, numHiddenUnits, numLayers): + super().__init__() + + InputLayer = [pt.nn.Linear(stateDim + numActions, numHiddenUnits), + pt.nn.ReLU()] + + HiddenLayers = [] + for _ in range(numLayers - 1): + HiddenLayers.append(pt.nn.Linear(numHiddenUnits, numHiddenUnits)) + HiddenLayers.append(pt.nn.ReLU()) + + + OutputLayer = [pt.nn.Linear(numHiddenUnits, 1)] + + AllLayers = InputLayer + HiddenLayers + OutputLayer + self.net = pt.nn.Sequential(*AllLayers) + + self.numActions = numActions + + def forward(self,x,a): + x = pt.tensor(x, dtype = pt.float32) + + b = pt.nn.functional.one_hot(pt.tensor(a).long(), self.numActions) + + c = b.float().detach() + y = pt.cat([x, c]) + + return self.net(y) + +class deepQagent: + def __init__(self,stateDim,numActions,numHiddenUnits,numLayers,epsilon=.1,gamma=.9,alpha=.1, + c = 100,batch_size=20): + self.Q = nnQ(stateDim,numActions,numHiddenUnits,numLayers) + self.Q_target = nnQ(stateDim,numActions,numHiddenUnits,numLayers) + + self.alpha = alpha + self.gamma = gamma + self.epsilon = epsilon + self.numActions = numActions + + self.D = [] + self.batch_size = batch_size + self.c = c + self.optimizer = pt.optim.SGD(self.Q.parameters(),lr=alpha) + self.counter = 0 + + def action(self,x): + # This is an epsilon greedy selection + if rnd.rand() < self.epsilon: + a = rnd.randint(numActions) + else: + qBest = -np.inf + for aTest in range(self.numActions): + qTest = self.Q(x,aTest).detach().numpy()[0] + if qTest > qBest: + qBest = qTest + a = aTest + return a + + def update(self,s,a,r,s_next,done): + self.counter += 1 + self.D.append((s,a,r,s_next,done)) + + B_ind = rnd.choice(len(self.D),size=self.batch_size) + + loss = 0. + + #B_ind = [-1] + for j in B_ind: + sj,aj,rj,s_next_j,done_j = self.D[j] + Q_cur = self.Q(sj,aj) + if done_j: + y = rj + else: + + Q_vals = [] + for a_next in range(self.numActions): + + Q_vals.append(self.Q_target(s_next_j,a_next).detach().numpy()[0]) + + + y = rj + self.gamma * np.max(Q_vals) + loss += .5 * (y-Q_cur)**2 / self.batch_size + self.optimizer.zero_grad() + #self.Q.zero_grad() + loss.backward() + self.optimizer.step() + + + if (self.counter % self.c) == 0: + for p, p_target in zip(self.Q.parameters(),self.Q_target.parameters()): + p_target.data = p.data.clone().detach() + +class sarsaAgent: + def __init__(self, stateDim, numActions, numHiddenUnits, numLayers, + epsilon = .1, gamma = .9, alpha = .1): + self.Q = nnQ(stateDim, numActions, numHiddenUnits, numLayers) + self.gamma = gamma + self.epsilon = epsilon + self.alpha = alpha + self.numActions = numActions + self.s_last = None + + def action(self, x): + # This is an epsilon greedy selection + a = 0 + if rnd.rand() < self.epsilon: + a = rnd.randint(0, numActions) + else: + qBest = -np.inf + for aTest in range(self.numActions): + qTest = self.Q(x, aTest).detach().numpy()[0] + if qTest > qBest: + qBest = qTest + a = aTest + return a + + def update(self, s, a, r, s_next,done): + # Compute the TD error, if there is enough data + update = True + if done: + Q_cur = self.Q(s, a).detach().numpy()[0] + delta = r - Q_cur + self.s_last = None + Q_diff = self.Q(s, a) + elif self.s_last is not None: + Q_next = self.Q(s, a).detach().numpy()[0] + Q_cur = self.Q(self.s_last, self.a_last).detach().numpy()[0] + delta = self.r_last + self.gamma * Q_next - Q_cur + Q_diff = self.Q(self.s_last, self.a_last) + else: + update = False + + # Update the parameter via the semi-gradient method + if update: + self.Q.zero_grad() + Q_diff.backward() + for p in self.Q.parameters(): + p.data.add_(self.alpha * delta, p.grad.data) + + if not done: + self.s_last = np.copy(s) + self.a_last = np.copy(a) + self.r_last = np.copy(r) + +# This is the environment +env = SwingUpEnv() + +# For simplicity, we only consider forces of -1 and 1 +numActions = 2 +Actions = np.linspace(-1, 1, numActions) + +# This is our learning agent +gamma = .95 +#agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5) +agent = deepQagent(5,numActions,20,2,epsilon=5e-2,gamma=gamma,batch_size=20, + c= 100,alpha=1e-4) + +maxSteps = 2e6 + +# This is a helper to deal with the fact that x[2] is actually an angle +x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]]) + +R = [] +UpTime = [] + +step = 0 +ep = 0 +maxLen = 500 +try: + while step < maxSteps: + ep += 1 + x = env.reset(home = ep > 1) + C = 0. + + done = False + t = 1 + while not done: + t += 1 + step += 1 + y = x_to_y(x) + a = agent.action(y) + u = Actions[a:a+1] + x_next, c, done, info = env.step(u) + + max_up_time = info['max_up_time'] + y_next = x_to_y(x_next) + + C += (1./t) * (c - C) + agent.update(y, a, c, y_next, done) + x = x_next + if done: + break + + if step >= maxSteps: + break + + if t > maxLen: + agent.s_last = None + break + + + R.append(C) + UpTime.append(max_up_time) + #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time) + log = "Episode:" + str(ep) + " Total Steps:" + str(step) + " Ave. Reward:" + str(C) + " Episode Length:" + str(t-1) + " Max Up-Time:" + str(max_up_time) + env.log(log) +except: + env.end() + exit(-1) +finally: + env.end() + exit(0) -- cgit v1.2.3