Skip to content

Commit

Permalink
gpu rollouts on correct format
Browse files Browse the repository at this point in the history
  • Loading branch information
larsvens committed Feb 14, 2020
1 parent dd2a3ca commit 6c8d3dc
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 26 deletions.
4 changes: 2 additions & 2 deletions common/config/reduced_mu_turn_nonadapt_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ t_activate: 15.0
t_final: 500.0
scenario_id: 2 # 1: pop-up, 2: reduced mu turn, 3: racing
s_begin_log: 180
s_pause_gazebo: []
#s_pause_gazebo: [180, 195, 210, 225, 240] # snapshots
#s_pause_gazebo: []
s_pause_gazebo: [180, 195, 210, 225, 240] # snapshots

# pop-up obstacle config
s_ego_at_popup: 50
Expand Down
212 changes: 191 additions & 21 deletions saarti/src/cuda_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,31 @@ __global__ void single_rollout(float *trajset_arr,
float *x_init,
uint Npath,
float *s_path,
float *vxref_path,
float *kappac_path,
float *mu_path,
float mu_nominal,
float m,
float Iz,
float lf,
float lr,
float h_cg,
float g,
uint Nt,
uint Ni,
uint Nx,
uint Nu,
uint Ntrajs,
uint Npp,
float dt)
float dt,
int traction_adaptive,
int ref_mode,
float vxref_nominal,
float *d_ref)
{
// notes on indexing:
// trajset_arr:
// row (i) <- state (Nx+Nu)
// row (i) <- state (Nx+Nu+Nmisc)
// column (j) <- traj (Ntrajs)
// page (k) <- time (Nt)
// iii - index of the 1d array that represents the 3d array (increase order: rows - columns - pages)
Expand All @@ -49,23 +58,147 @@ __global__ void single_rollout(float *trajset_arr,
trajset_arr[threadIdx.x + 5*Ntrajs + 0*Npp] = vy;

// rollout loop
for (int ki=0; ki<(Nt*Ni)-1; ki++){
float Fyf = 0.0f;
float Fxf = 0.0f;
float Fxr = 0.0f;
uint k = 0; // k is the regular iterator, ki is upsampled
for (int ki=0; ki<((Nt+1)*Ni); ki++){

// get kappac at s (todo!)
for (int id= 0; id<Npath; ++id) {
// break if spath - s is positive, save index
// get kappac at s
int path_idx;
for (path_idx = 0; path_idx<Npath; path_idx++) {
// break if s - spath is negative, save index
if(s-s_path[path_idx] <= 0){
break;
}
}
float kappac = 0;
float kappac = kappac_path[path_idx];

// set control input
float Fyf = 10000*((float(threadIdx.x)/float(Ntrajs))-0.5f);
float Fxf = 500;
float Fxr = 500;
// set ax for Fz computation from previous cmd
float ax = (Fxf+Fxr)/m;

// get normal forces front and back and mu
float Fzf;
float Fzr;
float mu;
if(traction_adaptive == 1){
float theta = 0; // grade angle todo get from pathlocal via traj
Fzf = (1.0f/(lf+lr))*(m*ax*h_cg - m*g*h_cg*sin(theta) + m*g*lr*cos(theta));
Fzr = (1.0f/(lf+lr))*(-m*ax*h_cg + m*g*h_cg*sin(theta) + m*g*lf*cos(theta));
mu = mu_path[path_idx]; // get mu from pathlocal at s
} else { // (traction_adaptive == 0)
Fzf = (1.0f/(lf+lr))*(m*g*lr);
Fzr = (1.0f/(lf+lr))*(m*g*lf);
mu = mu_nominal;
}

// get rear cornering stiffness
float B, C, D; //, E;
// todo add case for racing
if(0.0f <= mu && mu <0.3f){ // ice
B = 4.0f;
C = 2.0f;
D = mu;
//E = 1.0f;
} else if (0.3f <= mu && mu <0.5f) { // snow
B = 5.0f;
C = 2.0f;
D = mu;
//E = 1.0f;
} else if (0.5f <= mu && mu <0.9f) { // wet
B = 12.0f;
C = 2.3f;
D = mu;
//E = 1.0f;
} else if (0.9f <= mu && mu <2.5f) { // dry
B = 10.0f;
C = 1.9f;
D = mu;
//E = 0.97f;
} else {
// todo Error = nonzero nr - check at host and throw error
}
float Cr = B*C*D*Fzr; // Rajamani

// set Fyr
float Cr = 200000; // todo get dynamically!
float Fyr = 2*Cr*atan(lr*psidot-vy)/vx; // help variable

// compute maximum tire forces
float Ffmax = mu*Fzf;
float Frmax = mu*Fzr;

// get local vxref and vxerror
float vxref;
if(ref_mode == 1){ // cc
vxref = vxref_nominal;
} else if(ref_mode == 2) { // max s
vxref = vxref_path[path_idx];
} else {
vxref = 0; // minimize s (refmode 0)
}
float vxerror = vxref-vx; // todo use blockIdx.x here

// get derror (one per thread)
float derror = d_ref[threadIdx.x] - d;

/*
* ROLLOUT CONTROLLER
*/

// TODO CHECK THAT WEE GET SIMILAR OUTPUT FROM EULER AND RK4
// TODO GET PARAMS FOR STARNDARD STATE FEEDBACK

// select Fyf
float feedfwd = 0.5f*m*vx*vx*kappac*cos(deltapsi);
float feedback = 3000*derror - 500*deltapsi;
Fyf = feedfwd + feedback;

// saturate Fyf at Ffmax
if(Fyf >= Ffmax){
Fyf = Ffmax;
}
if(Fyf<=-Ffmax){
Fyf = -Ffmax;
}

// select Fxf
float Fxfmax = sqrt(Ffmax*Ffmax-Fyf*Fyf);
if (ref_mode == 0){ // minimize s
// select max negative Fxf until stop
Fxf = -Fxfmax;
} else if (ref_mode == 1 || ref_mode == 2){ // maximize s or cc
if(vxerror > 0){ // accelerating
Fxf = 0; // rear wheel drive - no drive on front wheel
} else { // braking
Fxf = 1000*vxerror;
// saturate
if(Fxf<=-Fxfmax){
Fxf = -Fxfmax;
}
}
}

// select Fxr
//float Fxrmax = mu*Fzr;
float Fxrmax = sqrt(Frmax*Frmax-Fyr*Fyr);
if (ref_mode == 0){ // minimize s
Fxr = -Fxrmax;
} else if (ref_mode == 1 || ref_mode == 2){ // maximize s or cc
Fxr = 1000*vxerror;
// saturate
if(Fxr >= Fxrmax){
Fxr = Fxrmax;
}
if(Fxr<=-Fxrmax){
Fxr = -Fxrmax;
}
}

// old
// Fyf = 10000*((float(threadIdx.x)/float(Ntrajs))-0.5f);
// Fxf = 500;
// Fxr = 500;

// euler fwd step
s = s + (dt/Ni)*((vx*cos(deltapsi)-vy*sin(deltapsi))/(1-d*kappac));
d = d + (dt/Ni)*(vx*sin(deltapsi)+vy*cos(deltapsi));
Expand All @@ -75,7 +208,7 @@ __global__ void single_rollout(float *trajset_arr,
vy = vy + (dt/Ni)*((1/m)*(Fyf+Fyr)-vx*psidot);

if(ki % Ni == 0){
uint k = ki/Ni;
k = ki/Ni;
// set x at k+1
trajset_arr[threadIdx.x + 0*Ntrajs + (k+1)*Npp] = s;
trajset_arr[threadIdx.x + 1*Ntrajs + (k+1)*Npp] = d;
Expand All @@ -87,6 +220,12 @@ __global__ void single_rollout(float *trajset_arr,
trajset_arr[threadIdx.x + 6*Ntrajs + (k)*Npp] = Fyf;
trajset_arr[threadIdx.x + 7*Ntrajs + (k)*Npp] = Fxf;
trajset_arr[threadIdx.x + 8*Ntrajs + (k)*Npp] = Fxr;
// set miscvars
trajset_arr[threadIdx.x + 9*Ntrajs + (k)*Npp] = Fzf;
trajset_arr[threadIdx.x + 10*Ntrajs + (k)*Npp] = Fzr;
trajset_arr[threadIdx.x + 11*Ntrajs + (k)*Npp] = kappac;
trajset_arr[threadIdx.x + 12*Ntrajs + (k)*Npp] = mu;
trajset_arr[threadIdx.x + 13*Ntrajs + (k)*Npp] = Cr;
}
}
}
Expand All @@ -98,6 +237,8 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
containers::staticparamstruct sp,
int traction_adaptive,
float mu_nominal,
containers::refstruct refs,
float vxref_nominal,
uint Nt, // N in planning horizon
uint Ni, // scaling factor in integration
uint Ntrajs, // Nr of trajs to roll out TODO replace w. Nd and Nvx
Expand All @@ -107,23 +248,26 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
// init variables
uint Nx = 6;
uint Nu = 3;
uint Npp = (Nx+Nu)*Ntrajs; // elements_per_page
uint Nmisc = 5; // nr of additional traj vars
uint Npp = (Nx+Nu+Nmisc)*Ntrajs; // elements_per_page
float *trajset_arr;
float *x_init;
float *d_ref;

uint Npath = pathlocal.s.size();
float *s_path;
float *vxref_path;
float *kappac_path;
//float *mu_path;
float *mu_path;

// allocate shared memory
cudaMallocManaged(&trajset_arr, (Nx+Nu)*Ntrajs*Nt*sizeof(float));
cudaMallocManaged(&trajset_arr, (Nx+Nu+Nmisc)*Ntrajs*Nt*sizeof(float));
cudaMallocManaged(&x_init, Nx*sizeof(float));
cudaMallocManaged(&d_ref, Ntrajs*sizeof(float));
cudaMallocManaged(&s_path, Npath*sizeof(float));
cudaMallocManaged(&vxref_path, Npath*sizeof(float));
cudaMallocManaged(&kappac_path, Npath*sizeof(float));
//cudaMallocManaged(&mu_path, Npath*sizeof(float));
cudaMallocManaged(&mu_path, Npath*sizeof(float));

// set init state
x_init[0] = initstate.s;
Expand All @@ -137,12 +281,12 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
float dlb = pathlocal.dlb.at(0);
float dub = pathlocal.dub.at(0);
float dstep = (dub-dlb)/Ntrajs;
for(int id=0; id<Ntrajs; ++id) {
for(uint id=0; id<Ntrajs; ++id) {
d_ref[id] = dlb+id*dstep;
}

// set path variables
for(int id=0; id<Npath; ++id) {
for(uint id=0; id<Npath; ++id) {
s_path[id] = pathlocal.s.at(id);
kappac_path[id] = pathlocal.kappa_c.at(id);
//mu_path[id] = pathlocal.mu.at(id);
Expand All @@ -153,18 +297,27 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
x_init,
Npath,
s_path,
vxref_path,
kappac_path,
mu_path,
mu_nominal,
sp.m,
sp.Iz,
sp.lf,
sp.lr,
sp.h_cg,
sp.g,
Nt,
Ni,
Nx,
Nu,
Ntrajs,
Npp,
dt);
dt,
traction_adaptive,
refs.ref_mode,
vxref_nominal,
d_ref);

// wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
Expand All @@ -180,10 +333,27 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
traj.psidot.push_back(trajset_arr[j + 3*Ntrajs + k*Npp]);
traj.vx.push_back(trajset_arr[j + 4*Ntrajs + k*Npp]);
traj.vy.push_back(trajset_arr[j + 5*Ntrajs + k*Npp]);
if(k<Nt){ // N+1 states and N ctrls
traj.Fyf.push_back(trajset_arr[j + 6*Ntrajs + k*Npp]);
traj.Fxf.push_back(trajset_arr[j + 7*Ntrajs + k*Npp]);
traj.Fxr.push_back(trajset_arr[j + 8*Ntrajs + k*Npp]);
// miscvars
traj.Fzf.push_back(trajset_arr[j + 9*Ntrajs + k*Npp]);
traj.Fzr.push_back(trajset_arr[j + 10*Ntrajs + k*Npp]);
traj.kappac.push_back(trajset_arr[j + 11*Ntrajs + k*Npp]);
traj.mu.push_back(trajset_arr[j + 12*Ntrajs + k*Npp]);
traj.Cr.push_back(trajset_arr[j + 13*Ntrajs + k*Npp]);
}
}
// add last element of misc vars
traj.Fzf.push_back(traj.Fzf.back());
traj.Fzr.push_back(traj.Fzr.back());
traj.kappac.push_back(traj.kappac.back());
traj.mu.push_back(traj.mu.back());
traj.Cr.push_back(traj.Cr.back());
trajset_struct.push_back(traj);
}
std::cout << "reached end of cuda_rollout" << std::endl;
//std::cout << "reached end of cuda_rollout" << std::endl;

// Free memory
cudaFree(trajset_arr);
Expand Down
2 changes: 1 addition & 1 deletion saarti/src/rtisqp_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ void RtisqpWrapper::computeTrajset(vector<containers::trajstruct> &trajset,
if(k==0){
traj.ax.push_back(0);
} else {
traj.ax.push_back( (traj.Fxf.at(k-1)+traj.Fyf.at(k-1))/sp.m );
traj.ax.push_back( (traj.Fxf.at(k-1)+traj.Fxr.at(k-1))/sp.m );
}

// compute normal forces front and back
Expand Down
17 changes: 15 additions & 2 deletions saarti/src/saarti_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ void cuda_rollout(std::vector<containers::trajstruct> &trajset_struct,
containers::staticparamstruct sp,
int traction_adaptive,
float mu_nominal,
containers::refstruct refs,
float vxref_nominal,
uint Nt,
uint Ni,
uint Ntrajs,
Expand Down Expand Up @@ -123,11 +125,22 @@ SAARTI::SAARTI(ros::NodeHandle nh){
if(sampling_augmentation_ == 1){
ROS_INFO_STREAM("generating trajectory set");
// cpu rollout
rtisqp_wrapper_.computeTrajset(trajset_,state_,pathlocal_,sp_,traction_adaptive_,mu_nominal_,vxref_cc_,refs_,uint(Ntrajs_rollout_));
//rtisqp_wrapper_.computeTrajset(trajset_,state_,pathlocal_,sp_,traction_adaptive_,mu_nominal_,vxref_cc_,refs_,uint(Ntrajs_rollout_));

// gpu rollout
uint Ni = 10;
cuda_rollout(trajset_, state_, pathlocal_, sp_, traction_adaptive_, mu_nominal_, N, Ni, uint(Ntrajs_rollout_),dt_);
cuda_rollout(trajset_,
state_,
pathlocal_,
sp_,
traction_adaptive_,
mu_nominal_,
refs_,
vxref_cc_,
N,
Ni,
uint(Ntrajs_rollout_),
dt_);

// append trajprime
if (trajstar_last.s.size()!=0){
Expand Down

0 comments on commit 6c8d3dc

Please sign in to comment.