diff options
author | Raspberry Pi <raspberrypi@umn.edu> | 2019-11-26 19:31:10 -0600 |
---|---|---|
committer | Raspberry Pi <raspberrypi@umn.edu> | 2019-11-26 19:31:10 -0600 |
commit | 83823aebece092a845bf1b9eae44175f91f7115d (patch) | |
tree | 5de439098f7287d7e9c3f466dd33385abcf137cd /System/system_swingup_test_2.py | |
parent | Modify test_System to verify that threaded encoder measurements are working a... (diff) | |
download | ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.gz ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.bz2 ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.lz ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.xz ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.tar.zst ee4511w-83823aebece092a845bf1b9eae44175f91f7115d.zip |
Got threading and SW limits working really well. Still might be some shakiness on deinitialization (need to coordinate GPIO.cleanup with threads), but program does exit and return home normally. Swingup test now runs very smooth and SW limits were increased with the drastically improved response time. Pushing to merge back to master.
Diffstat (limited to '')
-rw-r--r-- | System/system_swingup_test_2.py | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/System/system_swingup_test_2.py b/System/system_swingup_test_2.py index e240b01..5c0ac5a 100644 --- a/System/system_swingup_test_2.py +++ b/System/system_swingup_test_2.py @@ -53,13 +53,13 @@ class SwingUpEnv(): }
def __init__(self):
- self.sys = System(angular_units='Radians', positive_limit=10., negative_limit=-10., sw_limit_routine=self.x_threshold_routine)
+ self.x_threshold = 14.
+ self.sys = System(angular_units='Radians', positive_limit=self.x_threshold, negative_limit=-self.x_threshold, sw_limit_routine=self.x_threshold_routine)
- self.force_mag = 10.
+ self.force_mag = 11.
self.last_time = time.time() # time for seconds between updates
# Angle at which to fail the episode
- self.x_threshold = 10.
self.x_dot_threshold = 10.
self.theta_dot_threshold = 3*np.pi
@@ -134,6 +134,7 @@ class SwingUpEnv(): return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time}
def x_threshold_routine(self):
+ print("SW Limit reached!")
self.done = True
self.sys.adjust(0)
@@ -311,14 +312,16 @@ class sarsaAgent: env = SwingUpEnv()
# For simplicity, we only consider forces of -1 and 1
-numActions = 5
+numActions = 2
Actions = np.linspace(-1, 1, numActions)
# This is our learning agent
gamma = .95
-agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+#agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+agent = deepQagent(5,numActions,20,2,epsilon=5e-2,gamma=gamma,batch_size=20,
+ c= 100,alpha=1e-4)
-maxSteps = 5e4
+maxSteps = 2e5
# This is a helper to deal with the fact that x[2] is actually an angle
x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]])
|