1 files changed, 25 insertions, 9 deletions
diff --git a/System/system_swingup_test_2.py b/System/system_swingup_test_2.py
index 81d5419..5c0ac5a 100644
--- a/System/system_swingup_test_2.py
+++ b/System/system_swingup_test_2.py
@@ -53,13 +53,13 @@ class SwingUpEnv():
     }
 
     def __init__(self):
-        self.sys = System(angular_units='Radians')
+        self.x_threshold = 14.
+        self.sys = System(angular_units='Radians', positive_limit=self.x_threshold, negative_limit=-self.x_threshold, sw_limit_routine=self.x_threshold_routine)
 		
-        self.force_mag = 10.
+        self.force_mag = 11.
         self.last_time = time.time()  # time for seconds between updates
 
         # Angle at which to fail the episode
-        self.x_threshold = 10.
         self.x_dot_threshold = 10.
         self.theta_dot_threshold = 3*np.pi
 
@@ -73,6 +73,7 @@ class SwingUpEnv():
         self.state = None
 
         self.steps_beyond_done = None
+        self.done = False
 
     def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)
@@ -83,7 +84,9 @@ class SwingUpEnv():
         state = self.state
         x, x_dot, theta, theta_dot = state
         force = self.force_mag * action[0]
-        self.sys.adjust(force)
+        # Do not adjust the motor further if the x_threshold has been triggered by the SW limit
+        if self.done == False:
+            self.sys.adjust(force)
 
         costheta = math.cos(theta)
         sintheta = math.sin(theta)
@@ -105,10 +108,15 @@ class SwingUpEnv():
         self.state = (new_x, x_dot, new_theta, theta_dot)
         self.sys.add_results(new_theta, new_x, force)
 
-        done =  x < -self.x_threshold \
+        done =  theta_dot < -self.theta_dot_threshold \
+                or theta_dot > self.theta_dot_threshold \
+                or self.done == True
+        
+        '''done =  x < -self.x_threshold \
                 or x > self.x_threshold \
                 or theta_dot < -self.theta_dot_threshold \
-                or theta_dot > self.theta_dot_threshold
+                or theta_dot > self.theta_dot_threshold \
+                or self.done == True'''
         done = bool(done)
 
         if not done:
@@ -125,6 +133,11 @@ class SwingUpEnv():
 
         return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time}
 
+    def x_threshold_routine(self):
+        print("SW Limit reached!")
+        self.done = True
+        self.sys.adjust(0)
+
     def reset(self, home = True):
         if home == True:
             self.sys.return_home()
@@ -138,6 +151,7 @@ class SwingUpEnv():
         self.max_up_time = 0
         self.up = False
         self.steps_beyond_done = None
+        self.done = False
         return np.array(self.state)
         
     def end(self):
@@ -298,14 +312,16 @@ class sarsaAgent:
 env = SwingUpEnv()
 
 # For simplicity, we only consider forces of -1 and 1
-numActions = 5
+numActions = 2
 Actions = np.linspace(-1, 1, numActions)
 
 # This is our learning agent
 gamma = .95
-agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+#agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5)
+agent = deepQagent(5,numActions,20,2,epsilon=5e-2,gamma=gamma,batch_size=20,
+                   c= 100,alpha=1e-4)
 
-maxSteps = 5e4
+maxSteps = 2e5
 
 # This is a helper to deal with the fact that x[2] is actually an angle
 x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]])