From 2e4ab26e749a8ec17f6ab2de10722dd9031a3ae4 Mon Sep 17 00:00:00 2001
From: Raspberry Pi <raspberrypi@umn.edu>
Date: Tue, 19 Nov 2019 22:19:55 -0600
Subject: Fixed encoder zeroing. Other minor changes, still trying to get the
 swingup controller working. Not sure if the code is good enough to make it
 happen...

---
 System_Python/encoder.py             |  3 +-
 System_Python/system.py              | 17 +++++--
 System_Python/system_swingup_test.py | 98 +++++++++++++++++++++---------------
 System_Python/test_Return_Home.py    | 16 ++++++
 4 files changed, 90 insertions(+), 44 deletions(-)
 create mode 100644 System_Python/test_Return_Home.py

(limited to 'System_Python')

diff --git a/System_Python/encoder.py b/System_Python/encoder.py
index 63bc73e..ece7fd1 100644
--- a/System_Python/encoder.py
+++ b/System_Python/encoder.py
@@ -34,7 +34,8 @@ class Encoder:
         time.sleep(0.5)
     def set_zero(self, offset = 0):
         # Take current position as zero
-        self.offset = self.read_position('Raw') - offset
+        pos = self.read_position('Raw')
+        self.offset = (self.read_position('Raw') + self.offset + offset) % 1024
     def clockup(self):
         GPIO.output(self.clk_pin, 1)
     def clockdown(self):
diff --git a/System_Python/system.py b/System_Python/system.py
index b5a68cc..c6145ce 100644
--- a/System_Python/system.py
+++ b/System_Python/system.py
@@ -94,7 +94,7 @@ class System:
         pressed = True
         while pressed != False:
             # We must continue reading linear encoder motion to keep track of rotations
-            self.encoder_linear.read_position()
+            position = self.encoder_linear.read_position()
             pressed = GPIO.input(limit_positive_pin)
             sleep(0.01)
         #GPIO.wait_for_edge(limit_positive_pin, GPIO.FALLING)
@@ -116,6 +116,12 @@ class System:
         GPIO.add_event_detect(limit_positive_pin, GPIO.FALLING, callback=self.positive_limit_callback, bouncetime=300)
     # END initialize
     
+    # Return home, cleanup IO. This should be called when exiting the program
+    def deinitialize(self):
+        self.return_home()
+        self.motor.brake()
+        GPIO.cleanup()
+    
     # Get the values of the encoders to determine the angular and linear position of the pendulum.
     # Values are returned as a tuple: (angle, linear).
     ### angle: 0 indicates the pendulum is exactly straight up.
@@ -185,11 +191,16 @@ class System:
             self.motor.move(-4)
             while position > 0:
                 position = self.encoder_linear.read_position()
+                sleep(0.01)
+            self.motor.brake()
+            return
         elif position < 0:
             self.motor.move(4)
             while position < 0:
                 position = self.encoder_linear.read_position()
-        self.motor.brake()
+                sleep(0.01)
+            self.motor.brake()
+            return
     # END return_home
     
     # Callback for when negative limit switch is triggered.
@@ -214,7 +225,7 @@ class System:
     # END positive_limit_callback
     def limit_triggered(self):
         sleep(1)
-        self.return_home()
+        self.deinitialize()
         sys.exit(1)
 # END System
 
diff --git a/System_Python/system_swingup_test.py b/System_Python/system_swingup_test.py
index 7a9b5bd..e13c7ca 100644
--- a/System_Python/system_swingup_test.py
+++ b/System_Python/system_swingup_test.py
@@ -8,6 +8,7 @@ from gym.utils import seeding
 
 from system import System
 import time
+from sys import exit
 
 class SwingUpEnv():
     """
@@ -54,8 +55,8 @@ class SwingUpEnv():
     def __init__(self):
         self.sys = System(angular_units='Radians')
 		
-        self.force_mag = 10.0
-        self.tau = 0.02  # seconds between state updates
+        self.force_mag = 10.
+        self.last_time = time.time()  # time for seconds between updates
 
         # Angle at which to fail the episode
         self.x_threshold = 10.
@@ -93,10 +94,14 @@ class SwingUpEnv():
 
         else:
             self.up_time = 0
+
+        current_time = time.time()
+        tau = current_time - self.last_time
+        self.last_time = current_time
         
         new_theta, new_x = self.sys.measure()
-        theta_dot = (new_theta - theta) / self.tau
-        x_dot = (new_x - x) / self.tau
+        theta_dot = (new_theta - theta) / tau
+        x_dot = (new_x - x) / tau
         self.state = (new_x, x_dot, new_theta, theta_dot)
         self.sys.add_results(new_theta, new_x, force)
 
@@ -120,16 +125,23 @@ class SwingUpEnv():
 
         return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time}
 
-    def reset(self):
-        self.sys.return_home()
+    def reset(self, home = True):
+        if home == True:
+            self.sys.return_home()
         time.sleep(1)
-        self.state = (0, 0, np.pi, 0)
+        init_ang, lin = self.sys.measure()
+        time.sleep(0.05)
+        ang, lin = self.sys.measure()
+        self.state = (0, 0, ang, (ang-init_ang)/0.05)
 
         self.up_time = 0
         self.max_up_time = 0
         self.up = False
         self.steps_beyond_done = None
         return np.array(self.state)
+        
+    def end(self):
+        self.sys.deinitialize()
 
 
 class nnQ(pt.nn.Module):
@@ -181,7 +193,7 @@ class sarsaAgent:
         # This is an epsilon greedy selection
         a = 0
         if rnd.rand() < self.epsilon:
-            a = rnd.randint(numActions)
+            a = rnd.randint(0, numActions)
         else:
             qBest = -np.inf
             for aTest in range(self.numActions):
@@ -223,14 +235,14 @@ class sarsaAgent:
 env = SwingUpEnv()
 
 # For simplicity, we only consider forces of -1 and 1
-numActions = 2
+numActions = 5
 Actions = np.linspace(-1, 1, numActions)
 
 # This is our learning agent
 gamma = .95
 agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
 
-maxSteps = 2e5
+maxSteps = 5e4
 
 # This is a helper to deal with the fact that x[2] is actually an angle
 x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]])
@@ -240,36 +252,42 @@ UpTime = []
 
 step = 0
 ep = 0
-while step < maxSteps:
-    ep += 1
-    x = env.reset()
-    C = 0.
-    
-    done = False
-    t = 1
-    while not done:
-        t += 1
-        step += 1
-        y = x_to_y(x)
-        a = agent.action(y)
-        u = Actions[a:a+1]
-        x_next, c, done, info = env.step(u)
+try:
+    while step < maxSteps:
+        ep += 1
+        x = env.reset(home = ep > 1)
+        C = 0.
         
-        max_up_time = info['max_up_time']
-        y_next = x_to_y(x_next)
-
-        C += (1./t) * (c - C)
-        agent.update(y, a, c, y_next, done)
-        x = x_next
-        if done:
-            break
+        done = False
+        t = 1
+        while not done:
+            t += 1
+            step += 1
+            y = x_to_y(x)
+            a = agent.action(y)
+            u = Actions[a:a+1]
+            x_next, c, done, info = env.step(u)
             
-        if step >= maxSteps:
-            break
+            max_up_time = info['max_up_time']
+            y_next = x_to_y(x_next)
+
+            C += (1./t) * (c - C)
+            agent.update(y, a, c, y_next, done)
+            x = x_next
+            if done:
+                break
+                
+            if step >= maxSteps:
+                break
+                
             
-        
-        R.append(C)
-    UpTime.append(max_up_time)
-    #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time)
-    print('Episode:',ep, 'Total Steps:',step, ', Ave. Reward:',C, ', Episode Length:',t-1, 'Max Up-Time:',max_up_time)
-env.close()
+            R.append(C)
+        UpTime.append(max_up_time)
+        #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time)
+        print('Episode:',ep, 'Total Steps:',step, ', Ave. Reward:',C, ', Episode Length:',t-1, 'Max Up-Time:',max_up_time)
+except:
+    env.end()
+    exit(-1)
+finally:
+    env.end()
+    exit(0)
\ No newline at end of file
diff --git a/System_Python/test_Return_Home.py b/System_Python/test_Return_Home.py
new file mode 100644
index 0000000..244f99f
--- /dev/null
+++ b/System_Python/test_Return_Home.py
@@ -0,0 +1,16 @@
+from system import System
+from time import sleep
+        
+# Main program
+sys = System()
+sys.initialize()
+
+ang,lin = sys.measure()
+print("Starting position before moving: " + str(lin))
+sys.adjust(4)
+ang,lin = sys.measure()
+sleep(0.01)
+while lin < 10:
+    ang,lin = sys.measure()
+    sleep(0.01)
+sys.return_home()
-- 
cgit v1.2.3