diff options
author | Raspberry Pi <raspberrypi@umn.edu> | 2019-11-16 23:03:46 -0600 |
---|---|---|
committer | Raspberry Pi <raspberrypi@umn.edu> | 2019-11-16 23:03:46 -0600 |
commit | d851617a9df12ee6971de8d2c99e43a4d4fd6d27 (patch) | |
tree | 994ec9e71b98f66452253cb39650e496fb6c32d6 /System_Python/system_swingup_test.py | |
parent | Fixed some indentation issues and leftover from porting. Compiles and potenti... (diff) | |
download | ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.gz ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.bz2 ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.lz ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.xz ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.tar.zst ee4511w-d851617a9df12ee6971de8d2c99e43a4d4fd6d27.zip |
Updated encoder to have an offset when zeroing (allows that initial zero can be set to upright instead of hanging). Updated system to allow for different angular units to be used (passed by argument on constructor, then passed to encoder.read_position when used). Bug fixes to system_swingup_test. Swingup test now runs properly (and almost actually did a swing up at one point), couldn't keep testing because the system accidentally destroyed itself, so called it time to stop to let glue dry....
Diffstat (limited to 'System_Python/system_swingup_test.py')
-rw-r--r-- | System_Python/system_swingup_test.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/System_Python/system_swingup_test.py b/System_Python/system_swingup_test.py index e4d3a72..7a9b5bd 100644 --- a/System_Python/system_swingup_test.py +++ b/System_Python/system_swingup_test.py @@ -52,7 +52,7 @@ class SwingUpEnv(): }
def __init__(self):
- self.sys = System()
+ self.sys = System(angular_units='Radians')
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
@@ -95,10 +95,10 @@ class SwingUpEnv(): self.up_time = 0
new_theta, new_x = self.sys.measure()
- new_theta = radians(new_theta)
theta_dot = (new_theta - theta) / self.tau
x_dot = (new_x - x) / self.tau
self.state = (new_x, x_dot, new_theta, theta_dot)
+ self.sys.add_results(new_theta, new_x, force)
done = x < -self.x_threshold \
or x > self.x_threshold \
@@ -159,7 +159,7 @@ class nnQ(pt.nn.Module): def forward(self,x,a):
x = pt.tensor(x, dtype = pt.float32)
- b = pt.nn.functional.one_hot(pt.tensor(a), self.numActions)
+ b = pt.nn.functional.one_hot(pt.tensor(a).long(), self.numActions)
c = b.float().detach()
y = pt.cat([x, c])
@@ -179,6 +179,7 @@ class sarsaAgent: def action(self, x):
# This is an epsilon greedy selection
+ a = 0
if rnd.rand() < self.epsilon:
a = rnd.randint(numActions)
else:
@@ -252,7 +253,6 @@ while step < maxSteps: y = x_to_y(x)
a = agent.action(y)
u = Actions[a:a+1]
- env.render()
x_next, c, done, info = env.step(u)
max_up_time = info['max_up_time']
|