From d851617a9df12ee6971de8d2c99e43a4d4fd6d27 Mon Sep 17 00:00:00 2001
From: Raspberry Pi <raspberrypi@umn.edu>
Date: Sat, 16 Nov 2019 23:03:46 -0600
Subject: Updated encoder to have an offset when zeroing (allows that initial
 zero can be set to upright instead of hanging). Updated system to allow for
 different angular units to be used (passed by argument on constructor, then
 passed to encoder.read_position when used). Bug fixes to system_swingup_test.
 Swingup test now runs properly (and almost actually did a swing up at one
 point), couldn't keep testing because the system accidentally destroyed
 itself, so called it time to stop to let glue dry....

---
 System_Python/system_swingup_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'System_Python/system_swingup_test.py')

diff --git a/System_Python/system_swingup_test.py b/System_Python/system_swingup_test.py
index e4d3a72..7a9b5bd 100644
--- a/System_Python/system_swingup_test.py
+++ b/System_Python/system_swingup_test.py
@@ -52,7 +52,7 @@ class SwingUpEnv():
     }
 
     def __init__(self):
-        self.sys = System()
+        self.sys = System(angular_units='Radians')
 		
         self.force_mag = 10.0
         self.tau = 0.02  # seconds between state updates
@@ -95,10 +95,10 @@ class SwingUpEnv():
             self.up_time = 0
         
         new_theta, new_x = self.sys.measure()
-        new_theta = radians(new_theta)
         theta_dot = (new_theta - theta) / self.tau
         x_dot = (new_x - x) / self.tau
         self.state = (new_x, x_dot, new_theta, theta_dot)
+        self.sys.add_results(new_theta, new_x, force)
 
         done =  x < -self.x_threshold \
                 or x > self.x_threshold \
@@ -159,7 +159,7 @@ class nnQ(pt.nn.Module):
     def forward(self,x,a):
         x = pt.tensor(x, dtype = pt.float32)
 
-        b = pt.nn.functional.one_hot(pt.tensor(a), self.numActions)
+        b = pt.nn.functional.one_hot(pt.tensor(a).long(), self.numActions)
         
         c = b.float().detach()
         y = pt.cat([x, c])
@@ -179,6 +179,7 @@ class sarsaAgent:
 
     def action(self, x):
         # This is an epsilon greedy selection
+        a = 0
         if rnd.rand() < self.epsilon:
             a = rnd.randint(numActions)
         else:
@@ -252,7 +253,6 @@ while step < maxSteps:
         y = x_to_y(x)
         a = agent.action(y)
         u = Actions[a:a+1]
-        env.render()
         x_next, c, done, info = env.step(u)
         
         max_up_time = info['max_up_time']
-- 
cgit v1.2.3