From 2e4ab26e749a8ec17f6ab2de10722dd9031a3ae4 Mon Sep 17 00:00:00 2001 From: Raspberry Pi Date: Tue, 19 Nov 2019 22:19:55 -0600 Subject: Fixed encoder zeroing. Other minor changes, still trying to get the swingup controller working. Not sure if the code is good enough to make it happen... --- System_Python/encoder.py | 3 +- System_Python/system.py | 17 +++++-- System_Python/system_swingup_test.py | 98 +++++++++++++++++++++--------------- System_Python/test_Return_Home.py | 16 ++++++ 4 files changed, 90 insertions(+), 44 deletions(-) create mode 100644 System_Python/test_Return_Home.py (limited to 'System_Python') diff --git a/System_Python/encoder.py b/System_Python/encoder.py index 63bc73e..ece7fd1 100644 --- a/System_Python/encoder.py +++ b/System_Python/encoder.py @@ -34,7 +34,8 @@ class Encoder: time.sleep(0.5) def set_zero(self, offset = 0): # Take current position as zero - self.offset = self.read_position('Raw') - offset + pos = self.read_position('Raw') + self.offset = (self.read_position('Raw') + self.offset + offset) % 1024 def clockup(self): GPIO.output(self.clk_pin, 1) def clockdown(self): diff --git a/System_Python/system.py b/System_Python/system.py index b5a68cc..c6145ce 100644 --- a/System_Python/system.py +++ b/System_Python/system.py @@ -94,7 +94,7 @@ class System: pressed = True while pressed != False: # We must continue reading linear encoder motion to keep track of rotations - self.encoder_linear.read_position() + position = self.encoder_linear.read_position() pressed = GPIO.input(limit_positive_pin) sleep(0.01) #GPIO.wait_for_edge(limit_positive_pin, GPIO.FALLING) @@ -116,6 +116,12 @@ class System: GPIO.add_event_detect(limit_positive_pin, GPIO.FALLING, callback=self.positive_limit_callback, bouncetime=300) # END initialize + # Return home, cleanup IO. This should be called when exiting the program + def deinitialize(self): + self.return_home() + self.motor.brake() + GPIO.cleanup() + # Get the values of the encoders to determine the angular and linear position of the pendulum. # Values are returned as a tuple: (angle, linear). ### angle: 0 indicates the pendulum is exactly straight up. @@ -185,11 +191,16 @@ class System: self.motor.move(-4) while position > 0: position = self.encoder_linear.read_position() + sleep(0.01) + self.motor.brake() + return elif position < 0: self.motor.move(4) while position < 0: position = self.encoder_linear.read_position() - self.motor.brake() + sleep(0.01) + self.motor.brake() + return # END return_home # Callback for when negative limit switch is triggered. @@ -214,7 +225,7 @@ class System: # END positive_limit_callback def limit_triggered(self): sleep(1) - self.return_home() + self.deinitialize() sys.exit(1) # END System diff --git a/System_Python/system_swingup_test.py b/System_Python/system_swingup_test.py index 7a9b5bd..e13c7ca 100644 --- a/System_Python/system_swingup_test.py +++ b/System_Python/system_swingup_test.py @@ -8,6 +8,7 @@ from gym.utils import seeding from system import System import time +from sys import exit class SwingUpEnv(): """ @@ -54,8 +55,8 @@ class SwingUpEnv(): def __init__(self): self.sys = System(angular_units='Radians') - self.force_mag = 10.0 - self.tau = 0.02 # seconds between state updates + self.force_mag = 10. + self.last_time = time.time() # time for seconds between updates # Angle at which to fail the episode self.x_threshold = 10. @@ -93,10 +94,14 @@ class SwingUpEnv(): else: self.up_time = 0 + + current_time = time.time() + tau = current_time - self.last_time + self.last_time = current_time new_theta, new_x = self.sys.measure() - theta_dot = (new_theta - theta) / self.tau - x_dot = (new_x - x) / self.tau + theta_dot = (new_theta - theta) / tau + x_dot = (new_x - x) / tau self.state = (new_x, x_dot, new_theta, theta_dot) self.sys.add_results(new_theta, new_x, force) @@ -120,16 +125,23 @@ class SwingUpEnv(): return np.array(self.state), reward, done, {'max_up_time' : self.max_up_time} - def reset(self): - self.sys.return_home() + def reset(self, home = True): + if home == True: + self.sys.return_home() time.sleep(1) - self.state = (0, 0, np.pi, 0) + init_ang, lin = self.sys.measure() + time.sleep(0.05) + ang, lin = self.sys.measure() + self.state = (0, 0, ang, (ang-init_ang)/0.05) self.up_time = 0 self.max_up_time = 0 self.up = False self.steps_beyond_done = None return np.array(self.state) + + def end(self): + self.sys.deinitialize() class nnQ(pt.nn.Module): @@ -181,7 +193,7 @@ class sarsaAgent: # This is an epsilon greedy selection a = 0 if rnd.rand() < self.epsilon: - a = rnd.randint(numActions) + a = rnd.randint(0, numActions) else: qBest = -np.inf for aTest in range(self.numActions): @@ -223,14 +235,14 @@ class sarsaAgent: env = SwingUpEnv() # For simplicity, we only consider forces of -1 and 1 -numActions = 2 +numActions = 5 Actions = np.linspace(-1, 1, numActions) # This is our learning agent gamma = .95 agent = sarsaAgent(5, numActions, 20, 1, epsilon = 5e-2, gamma = gamma, alpha = 1e-5) -maxSteps = 2e5 +maxSteps = 5e4 # This is a helper to deal with the fact that x[2] is actually an angle x_to_y = lambda x : np.array([x[0], x[1], np.cos(x[2]), np.sin(x[2]), x[3]]) @@ -240,36 +252,42 @@ UpTime = [] step = 0 ep = 0 -while step < maxSteps: - ep += 1 - x = env.reset() - C = 0. - - done = False - t = 1 - while not done: - t += 1 - step += 1 - y = x_to_y(x) - a = agent.action(y) - u = Actions[a:a+1] - x_next, c, done, info = env.step(u) +try: + while step < maxSteps: + ep += 1 + x = env.reset(home = ep > 1) + C = 0. - max_up_time = info['max_up_time'] - y_next = x_to_y(x_next) - - C += (1./t) * (c - C) - agent.update(y, a, c, y_next, done) - x = x_next - if done: - break + done = False + t = 1 + while not done: + t += 1 + step += 1 + y = x_to_y(x) + a = agent.action(y) + u = Actions[a:a+1] + x_next, c, done, info = env.step(u) - if step >= maxSteps: - break + max_up_time = info['max_up_time'] + y_next = x_to_y(x_next) + + C += (1./t) * (c - C) + agent.update(y, a, c, y_next, done) + x = x_next + if done: + break + + if step >= maxSteps: + break + - - R.append(C) - UpTime.append(max_up_time) - #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time) - print('Episode:',ep, 'Total Steps:',step, ', Ave. Reward:',C, ', Episode Length:',t-1, 'Max Up-Time:',max_up_time) -env.close() + R.append(C) + UpTime.append(max_up_time) + #print('t:',ep+1,', R:',C,', L:',t-1,', G:',G,', Q:', Q_est, 'U:', max_up_time) + print('Episode:',ep, 'Total Steps:',step, ', Ave. Reward:',C, ', Episode Length:',t-1, 'Max Up-Time:',max_up_time) +except: + env.end() + exit(-1) +finally: + env.end() + exit(0) \ No newline at end of file diff --git a/System_Python/test_Return_Home.py b/System_Python/test_Return_Home.py new file mode 100644 index 0000000..244f99f --- /dev/null +++ b/System_Python/test_Return_Home.py @@ -0,0 +1,16 @@ +from system import System +from time import sleep + +# Main program +sys = System() +sys.initialize() + +ang,lin = sys.measure() +print("Starting position before moving: " + str(lin)) +sys.adjust(4) +ang,lin = sys.measure() +sleep(0.01) +while lin < 10: + ang,lin = sys.measure() + sleep(0.01) +sys.return_home() -- cgit v1.2.3