unconverge

Hi! I'm learning DDP method recently and also upvoted your brilliant implementation. It seems like you are using the MPC version of ilqr? I change it into normal version but it does not converged any more.... Could you please exaplain a little bit for me.
But anyway thanks for your implementation!!
The following is my code which is modified from your code!

import gym
import env
from autograd import grad, jacobian
import autograd.numpy as np

class ILqr:
    def __init__(self, next_state, running_cost, final_cost,
                 umax, state_dim, pred_time=500):
        self.pred_time = pred_time
        self.umax = umax
        self.v = [0.0 for _ in range(pred_time + 1)]
        self.v_x = [np.zeros(state_dim) for _ in range(pred_time + 1)]
        self.v_xx = [np.zeros((state_dim, state_dim)) for _ in range(pred_time + 1)]
        self.f = next_state
        self.lf = final_cost
        self.lf_x = grad(self.lf)
        self.lf_xx = jacobian(self.lf_x)
        self.l_x = grad(running_cost, 0)
        self.l_u = grad(running_cost, 1)
        self.l_xx = jacobian(self.l_x, 0)
        self.l_uu = jacobian(self.l_u, 1)
        self.l_ux = jacobian(self.l_u, 0)
        self.f_x = jacobian(self.f, 0)
        self.f_u = jacobian(self.f, 1)

    def backward(self, x_seq, u_seq):
        self.v[-1] = self.lf(x_seq[-1])
        self.v_x[-1] = self.lf_x(x_seq[-1])
        self.v_xx[-1] = self.lf_xx(x_seq[-1])
        k_seq = []
        kk_seq = []
        for t in range(self.pred_time - 1, -1, -1):
            f_x_t = self.f_x(x_seq[t], u_seq[t])
            f_u_t = self.f_u(x_seq[t], u_seq[t])
            q_x = self.l_x(x_seq[t], u_seq[t]) + np.matmul(f_x_t.T, self.v_x[t + 1])
            q_u = self.l_u(x_seq[t], u_seq[t]) + np.matmul(f_u_t.T, self.v_x[t + 1])
            q_xx = self.l_xx(x_seq[t], u_seq[t]) + \
              np.matmul(np.matmul(f_x_t.T, self.v_xx[t + 1]), f_x_t) 
            tmp = np.matmul(f_u_t.T, self.v_xx[t + 1])
            q_uu = self.l_uu(x_seq[t], u_seq[t]) + np.matmul(tmp, f_u_t) 
            q_ux = self.l_ux(x_seq[t], u_seq[t]) + np.matmul(tmp, f_x_t) 
            inv_q_uu = np.linalg.inv(q_uu)
            k = -np.matmul(inv_q_uu, q_u)
            kk = -np.matmul(inv_q_uu, q_ux)
            dv = 0.5 * np.matmul(q_u, k)
            self.v[t] += dv
            self.v_x[t] = q_x - np.matmul(np.matmul(q_u, inv_q_uu), q_ux)
            self.v_xx[t] = q_xx + np.matmul(q_ux.T, kk)
            k_seq.append(k)
            kk_seq.append(kk)
        k_seq.reverse()
        kk_seq.reverse()
        return k_seq, kk_seq

    def forward(self, x_seq, u_seq, k_seq, kk_seq):
        x_seq_hat = np.array(x_seq)
        u_seq_hat = np.array(u_seq)
        for t in range(len(u_seq)):
            control = k_seq[t] + np.matmul(kk_seq[t], (x_seq_hat[t] - x_seq[t]))
            u_seq_hat[t] = np.clip(u_seq[t] + control, -self.umax, self.umax)


        return  u_seq_hat

env = gym.make('CartPoleContinuous-v0').env
obs = env.reset()
ilqr = ILqr(lambda x, u: env._state_eq(x, u),  
            lambda x, u: 0.5 * np.sum(np.square(u)),  
            lambda x: 0.5 * (np.square(1.0 - np.cos(x[2])) + np.square(x[1]) + np.square(x[3])),  
            env.max_force,
            env.observation_space.shape[0])
u_seq = [np.zeros(1) for _ in range(ilqr.pred_time)]
x_seq = [obs.copy()]
for t in range(ilqr.pred_time):
    x_seq.append(env._state_eq(x_seq[-1], u_seq[t]))

cnt = 0
while True:
    env.reset()

    k_seq, kk_seq = ilqr.backward(x_seq, u_seq)
    u_seq = ilqr.forward(x_seq, u_seq, k_seq, kk_seq)


    cost=0
    x_seq = [env.reset()]
    for t in range(ilqr.pred_time):
        obs, _, _, _ = env.step(u_seq[t])
        x_seq.append(obs)
        cost=cost+0.5 * np.sum(np.square(u_seq[t]))
    x=x_seq[-1]
    cost=cost+0.5 * (np.square(1.0 - np.cos(x[2])) + np.square(x[1]) + np.square(x[3]))
    cnt += 1
    print(cost)
    if cost<15:
        break

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

unconverge #2

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

unconverge #2

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions