aboutsummaryrefslogtreecommitdiffstats
path: root/System_Python/system_swingup_test.py
diff options
context:
space:
mode:
authorRaspberry Pi <raspberrypi@umn.edu>2019-11-16 23:03:46 -0600
committerRaspberry Pi <raspberrypi@umn.edu>2019-11-16 23:03:46 -0600
commitd851617a9df12ee6971de8d2c99e43a4d4fd6d27 (patch)
tree994ec9e71b98f66452253cb39650e496fb6c32d6 /System_Python/system_swingup_test.py
parentFixed some indentation issues and leftover from porting. Compiles and potenti... (diff)
downloadee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.gz
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.bz2
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.lz
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.xz
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.zst
ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.zip
Updated encoder to have an offset when zeroing (allows that initial zero can be set to upright instead of hanging). Updated system to allow for different angular units to be used (passed by argument on constructor, then passed to encoder.read_position when used). Bug fixes to system_swingup_test. Swingup test now runs properly (and almost actually did a swing up at one point), couldn't keep testing because the system accidentally destroyed itself, so called it time to stop to let glue dry....
Diffstat (limited to '')
-rw-r--r--System_Python/system_swingup_test.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/System_Python/system_swingup_test.py b/System_Python/system_swingup_test.py
index e4d3a72..7a9b5bd 100644
--- a/System_Python/system_swingup_test.py
+++ b/System_Python/system_swingup_test.py
@@ -52,7 +52,7 @@ class SwingUpEnv():
}
def __init__(self):
- self.sys = System()
+ self.sys = System(angular_units='Radians')
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
@@ -95,10 +95,10 @@ class SwingUpEnv():
self.up_time = 0
new_theta, new_x = self.sys.measure()
- new_theta = radians(new_theta)
theta_dot = (new_theta - theta) / self.tau
x_dot = (new_x - x) / self.tau
self.state = (new_x, x_dot, new_theta, theta_dot)
+ self.sys.add_results(new_theta, new_x, force)
done = x < -self.x_threshold \
or x > self.x_threshold \
@@ -159,7 +159,7 @@ class nnQ(pt.nn.Module):
def forward(self,x,a):
x = pt.tensor(x, dtype = pt.float32)
- b = pt.nn.functional.one_hot(pt.tensor(a), self.numActions)
+ b = pt.nn.functional.one_hot(pt.tensor(a).long(), self.numActions)
c = b.float().detach()
y = pt.cat([x, c])
@@ -179,6 +179,7 @@ class sarsaAgent:
def action(self, x):
# This is an epsilon greedy selection
+ a = 0
if rnd.rand() < self.epsilon:
a = rnd.randint(numActions)
else:
@@ -252,7 +253,6 @@ while step < maxSteps:
y = x_to_y(x)
a = agent.action(y)
u = Actions[a:a+1]
- env.render()
x_next, c, done, info = env.step(u)
max_up_time = info['max_up_time']