aboutsummaryrefslogtreecommitdiffstats
path: root/System/system_swingup_test_2.py
diff options
context:
space:
mode:
authorRaspberry Pi <raspberrypi@umn.edu>2019-11-26 19:31:10 -0600
committerRaspberry Pi <raspberrypi@umn.edu>2019-11-26 19:31:10 -0600
commit83823aebece092a845bf1b9eae44175f91f7115d (patch)
tree5de439098f7287d7e9c3f466dd33385abcf137cd /System/system_swingup_test_2.py
parentModify test_System to verify that threaded encoder measurements are working a... (diff)
downloadee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.gz
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.bz2
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.lz
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.xz
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.zst
ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.zip
Got threading and SW limits working really well. Still might be some shakiness on deinitialization (need to coordinate GPIO.cleanup with threads), but program does exit and return home normally. Swingup test now runs very smooth and SW limits were increased with the drastically improved response time. Pushing to merge back to master.
Diffstat (limited to 'System/system_swingup_test_2.py')
-rw-r--r--System/system_swingup_test_2.py15
1 files changed, 9 insertions, 6 deletions
diff --git a/System/system_swingup_test_2.py b/System/system_swingup_test_2.py
index e240b01..5c0ac5a 100644
--- a/System/system_swingup_test_2.py
+++ b/System/system_swingup_test_2.py
@@ -53,13 +53,13 @@ class SwingUpEnv():
}
def __init__(self):
- self.sys = System(angular_units='Radians', positive_limit=10., negative_limit=-10., sw_limit_routine=self.x_threshold_routine)
+ self.x_threshold = 14.
+ self.sys = System(angular_units='Radians', positive_limit=self.x_threshold, negative_limit=-self.x_threshold, sw_limit_routine=self.x_threshold_routine)
- self.force_mag = 10.
+ self.force_mag = 11.
self.last_time = time.time() # time for seconds between updates
# Angle at which to fail the episode
- self.x_threshold = 10.
self.x_dot_threshold = 10.
self.theta_dot_threshold = 3*np.pi
@@ -134,6 +134,7 @@ class SwingUpEnv():
return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time}
def x_threshold_routine(self):
+ print("SW Limit reached!")
self.done = True
self.sys.adjust(0)
@@ -311,14 +312,16 @@ class sarsaAgent:
env = SwingUpEnv()
# For simplicity, we only consider forces of -1 and 1
-numActions = 5
+numActions = 2
Actions = np.linspace(-1, 1, numActions)
# This is our learning agent
gamma = .95
-agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+#agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+agent = deepQagent(5,numActions,20,2,epsilon=5e-2,gamma=gamma,batch_size=20,
+ c= 100,alpha=1e-4)
-maxSteps = 5e4
+maxSteps = 2e5
# This is a helper to deal with the fact that x[2] is actually an angle
x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]])