Skip to content

Commit

Permalink
Merge pull request #352 from google-deepmind/deepmind
Browse files Browse the repository at this point in the history
Merge from `deepmind` to `main`.
  • Loading branch information
thowell authored Nov 15, 2024
2 parents b844928 + dff75d8 commit 9bd2037
Show file tree
Hide file tree
Showing 12 changed files with 172 additions and 40 deletions.
82 changes: 67 additions & 15 deletions mjpc/agent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ void Agent::Initialize(const mjModel* model) {
planner_threads_ =
std::max(1, NumAvailableHardwareThreads() - 3 - 2 * estimator_threads_);

// differentiable planning model
// by default gradient-based planners use a differentiable model
int gradient_planner = false;
if (planner_ == kGradientPlanner || planner_ == kILQGPlanner ||
planner_ == kILQSPlanner) {
gradient_planner = true;
}
differentiable_ =
GetNumberOrDefault(gradient_planner, model, "agent_differentiable");
jnt_solimp_.resize(model->njnt);
geom_solimp_.resize(model->ngeom);
pair_solimp_.resize(model->npair);

// delete the previous model after all the planners have been updated to use
// the new one.
if (old_model) {
Expand Down Expand Up @@ -279,6 +292,22 @@ void Agent::PlanIteration(ThreadPool* pool) {
steps_ =
mju_max(mju_min(horizon_ / timestep_ + 1, kMaxTrajectoryHorizon), 1);

// make model differentiable
int differentiable = differentiable_;
if (differentiable) {
// cache solimp defaults
for (int i = 0; i < model_->njnt; i++) {
jnt_solimp_[i] = model_->jnt_solimp[mjNIMP * i];
}
for (int i = 0; i < model_->ngeom; i++) {
geom_solimp_[i] = model_->geom_solimp[mjNIMP * i];
}
for (int i = 0; i < model_->npair; i++) {
pair_solimp_[i] = model_->pair_solimp[mjNIMP * i];
}
MakeDifferentiable(model_);
}

// plan
if (!allocate_enabled) {
// set state
Expand Down Expand Up @@ -312,6 +341,19 @@ void Agent::PlanIteration(ThreadPool* pool) {
// release the planning residual function
residual_fn_.reset();
}

// restore solimp defaults
if (differentiable) {
for (int i = 0; i < model_->njnt; i++) {
model_->jnt_solimp[mjNIMP * i] = jnt_solimp_[i];
}
for (int i = 0; i < model_->ngeom; i++) {
model_->geom_solimp[mjNIMP * i] = geom_solimp_[i];
}
for (int i = 0; i < model_->npair; i++) {
model_->pair_solimp[mjNIMP * i] = pair_solimp_[i];
}
}
}

// call planner to update nominal policy
Expand Down Expand Up @@ -644,21 +686,23 @@ void Agent::GUI(mjUI& ui) {
}

// ----- agent ----- //
mjuiDef defAgent[] = {{mjITEM_SECTION, "Agent", 1, nullptr, "AP"},
{mjITEM_BUTTON, "Reset", 2, nullptr, " #459"},
{mjITEM_SELECT, "Planner", 2, &planner_, ""},
{mjITEM_SELECT, "Estimator", 2, &estimator_, ""},
{mjITEM_CHECKINT, "Plan", 2, &plan_enabled, ""},
{mjITEM_CHECKINT, "Action", 2, &action_enabled, ""},
{mjITEM_CHECKINT, "Plots", 2, &plot_enabled, ""},
{mjITEM_CHECKINT, "Traces", 2, &visualize_enabled, ""},
{mjITEM_SEPARATOR, "Agent Settings", 1},
{mjITEM_SLIDERNUM, "Horizon", 2, &horizon_, "0 1"},
{mjITEM_SLIDERNUM, "Timestep", 2, &timestep_, "0 1"},
{mjITEM_SELECT, "Integrator", 2, &integrator_,
"Euler\nRK4\nImplicit\nImplicitFast"},
{mjITEM_SEPARATOR, "Planner Settings", 1},
{mjITEM_END}};
mjuiDef defAgent[] = {
{mjITEM_SECTION, "Agent", 1, nullptr, "AP"},
{mjITEM_BUTTON, "Reset", 2, nullptr, " #459"},
{mjITEM_SELECT, "Planner", 2, &planner_, ""},
{mjITEM_SELECT, "Estimator", 2, &estimator_, ""},
{mjITEM_CHECKINT, "Plan", 2, &plan_enabled, ""},
{mjITEM_CHECKINT, "Action", 2, &action_enabled, ""},
{mjITEM_CHECKINT, "Plots", 2, &plot_enabled, ""},
{mjITEM_CHECKINT, "Traces", 2, &visualize_enabled, ""},
{mjITEM_SEPARATOR, "Agent Settings", 1},
{mjITEM_SLIDERNUM, "Horizon", 2, &horizon_, "0 1"},
{mjITEM_SLIDERNUM, "Timestep", 2, &timestep_, "0 1"},
{mjITEM_SELECT, "Integrator", 2, &integrator_,
"Euler\nRK4\nImplicit\nImplicitFast"},
{mjITEM_CHECKINT, "Differentiable", 2, &differentiable_, ""},
{mjITEM_SEPARATOR, "Planner Settings", 1},
{mjITEM_END}};

// planner names
mju::strcpy_arr(defAgent[2].other, planner_names_);
Expand Down Expand Up @@ -730,6 +774,14 @@ void Agent::AgentEvent(mjuiItem* it, mjData* data,
this->PlotInitialize();
this->PlotReset();

// by default gradient-based planners use a differentiable model
if (planner_ == kGradientPlanner || planner_ == kILQGPlanner ||
planner_ == kILQSPlanner) {
differentiable_ = true;
} else {
differentiable_ = false;
}

// reset agent
uiloadrequest.fetch_sub(1);
}
Expand Down
6 changes: 6 additions & 0 deletions mjpc/agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ class Agent {

// max threads for estimation
int estimator_threads_;

// differentiable planning model
bool differentiable_;
std::vector<double> jnt_solimp_;
std::vector<double> geom_solimp_;
std::vector<double> pair_solimp_;
};

} // namespace mjpc
Expand Down
44 changes: 30 additions & 14 deletions mjpc/planners/cross_entropy/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <algorithm>
#include <chrono>
#include <cmath>
#include <mutex>
#include <shared_mutex>

#include <absl/random/random.h>
Expand Down Expand Up @@ -54,8 +55,11 @@ void CrossEntropyPlanner::Initialize(mjModel* model, const Task& task) {
// sampling noise
std_initial_ =
GetNumberOrDefault(0.1, model,
"sampling_exploration"); // initial variance
std_min_ = GetNumberOrDefault(0.1, model, "std_min"); // minimum variance
"sampling_exploration"); // initial variance
std_min_ = GetNumberOrDefault(0.01, model, "std_min"); // minimum variance
// fraction of the trajectories that will use full exploration noise
explore_fraction_ =
GetNumberOrDefault(0.0, model, "explore_fraction");

// set number of trajectories to rollout
num_trajectory_ = GetNumberOrDefault(10, model, "sampling_trajectories");
Expand Down Expand Up @@ -227,12 +231,13 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
for (int i = 0; i < n_elite; i++) {
// ordered trajectory index
int idx = trajectory_order[i];
const TimeSpline& elite_plan = candidate_policy[idx].plan;

// add parameters
for (int i = 0; i < num_spline_points; i++) {
TimeSpline::Node n = candidate_policy[idx].plan.NodeAt(i);
for (int t = 0; t < num_spline_points; t++) {
TimeSpline::ConstNode n = elite_plan.NodeAt(t);
for (int j = 0; j < model->nu; j++) {
parameters_scratch[i * model->nu + j] += n.values()[j];
parameters_scratch[t * model->nu + j] += n.values()[j];
}
}

Expand All @@ -247,12 +252,15 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {

// loop over elites to compute variance
std::fill(variance.begin(), variance.end(), 0.0); // reset variance to zero
for (int t = 0; t < num_spline_points; t++) {
TimeSpline::Node n = candidate_policy[trajectory_order[0]].plan.NodeAt(t);
for (int j = 0; j < model->nu; j++) {
// average
double p_avg = parameters_scratch[t * model->nu + j];
for (int i = 0; i < n_elite; i++) {
for (int i = 0; i < n_elite; i++) {
int idx = trajectory_order[i];
const TimeSpline& elite_plan = candidate_policy[idx].plan;
for (int t = 0; t < num_spline_points; t++) {
TimeSpline::ConstNode n = elite_plan.NodeAt(t);
for (int j = 0; j < model->nu; j++) {
// average
double p_avg = parameters_scratch[t * model->nu + j];

// candidate parameter
double pi = n.values()[j];
double diff = pi - p_avg;
Expand All @@ -263,7 +271,7 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {

// update
{
const std::shared_lock<std::shared_mutex> lock(mtx_);
const std::unique_lock<std::shared_mutex> lock(mtx_);
policy.plan.Clear();
policy.plan.SetInterpolation(interpolation_);
for (int t = 0; t < num_spline_points; t++) {
Expand Down Expand Up @@ -384,14 +392,21 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,

// lock std_min
double std_min = std_min_;
double std_initial = std_initial_;

// random search
int count_before = pool.GetCount();
for (int i = 0; i < num_trajectory; i++) {
double std;
if (i < num_trajectory * explore_fraction_) {
std = std_initial;
} else {
std = std_min;
}
pool.Schedule([&s = *this, &model = this->model, &task = this->task,
&state = this->state, &time = this->time,
&mocap = this->mocap, &userdata = this->userdata, horizon,
std_min, i]() {
std, i]() {
// copy nominal policy and sample noise
{
const std::shared_lock<std::shared_mutex> lock(s.mtx_);
Expand All @@ -401,7 +416,7 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
s.resampled_policy.plan.Interpolation());

// sample noise
s.AddNoiseToPolicy(i, std_min);
s.AddNoiseToPolicy(i, std);
}

// ----- rollout sample policy ----- //
Expand Down Expand Up @@ -486,6 +501,7 @@ void CrossEntropyPlanner::GUI(mjUI& ui) {
{mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
{mjITEM_SLIDERNUM, "Init. Std", 2, &std_initial_, "0 1"},
{mjITEM_SLIDERNUM, "Min. Std", 2, &std_min_, "0.01 0.5"},
{mjITEM_SLIDERNUM, "Explore", 2, &explore_fraction_, "0.0 1.0"},
{mjITEM_SLIDERINT, "Elite", 2, &n_elite_, "2 128"},
{mjITEM_END}};

Expand Down
2 changes: 2 additions & 0 deletions mjpc/planners/cross_entropy/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ class CrossEntropyPlanner : public Planner {
double std_initial_; // standard deviation for sampling normal: N(0,
// std)
double std_min_; // the minimum allowable std
double explore_fraction_ = 0; // fraction of trajectories that will use
// std_initial instead of the variance from CEM
std::vector<double> noise;
std::vector<double> variance;

Expand Down
14 changes: 13 additions & 1 deletion mjpc/planners/ilqg/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,19 @@ void iLQGPlanner::Iteration(int horizon, ThreadPool& pool) {
std::cout << " dV: " << expected << '\n';
std::cout << " dV[0]: " << backward_pass.dV[0] << '\n';
std::cout << " dV[1]: " << backward_pass.dV[1] << '\n';
std::cout << std::endl;

std::cout << "\niLQG Timing (ms)\n" << '\n';
std::cout << " nominal: " << nominal_compute_time * 1.0e-3 << '\n';
std::cout << " model derivative: "
<< model_derivative_compute_time * 1.0e-3 << '\n';
std::cout << " cost derivative: " << cost_derivative_compute_time * 1.0e-3
<< '\n';
std::cout << " backward pass: " << backward_pass_compute_time * 1.0e-3
<< '\n';
std::cout << " rollouts: " << rollouts_compute_time * 1.0e-3 << '\n';
std::cout << " policy update: " << policy_update_compute_time * 1.0e-3
<< '\n';
std::cout << "\n\n";
}

// stop timer
Expand Down
11 changes: 11 additions & 0 deletions mjpc/planners/include.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@

namespace mjpc {

// planner types
enum PlannerType : int {
kSamplingPlanner = 0,
kGradientPlanner,
kILQGPlanner,
kILQSPlanner,
kRobustPlanner,
kCrossEntropyPlanner,
kSampleGradientPlanner,
};

// Planner names, separated by '\n'.
extern const char kPlannerNames[];

Expand Down
22 changes: 14 additions & 8 deletions mjpc/tasks/quadruped/quadruped.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,21 @@ void QuadrupedFlat::ResidualFn::Residual(const mjModel* model,
if (current_mode_ == kModeBiped) {
// loosen the "hands" in Biped mode
bool handstand = ReinterpretAsInt(parameters_[biped_type_param_id_]);
double arm_posture = parameters_[arm_posture_param_id_];
if (handstand) {
residual[counter + 4] *= 0.03;
residual[counter + 5] *= 0.03;
residual[counter + 10] *= 0.03;
residual[counter + 11] *= 0.03;
residual[counter + 6] *= arm_posture;
residual[counter + 7] *= arm_posture;
residual[counter + 8] *= arm_posture;
residual[counter + 9] *= arm_posture;
residual[counter + 10] *= arm_posture;
residual[counter + 11] *= arm_posture;
} else {
residual[counter + 1] *= 0.03;
residual[counter + 2] *= 0.03;
residual[counter + 7] *= 0.03;
residual[counter + 8] *= 0.03;
residual[counter + 0] *= arm_posture;
residual[counter + 1] *= arm_posture;
residual[counter + 2] *= arm_posture;
residual[counter + 3] *= arm_posture;
residual[counter + 4] *= arm_posture;
residual[counter + 5] *= arm_posture;
}
}
counter += model->nu;
Expand Down Expand Up @@ -521,6 +526,7 @@ void QuadrupedFlat::ResetLocked(const mjModel* model) {
residual_.cadence_param_id_ = ParameterIndex(model, "Cadence");
residual_.amplitude_param_id_ = ParameterIndex(model, "Amplitude");
residual_.duty_param_id_ = ParameterIndex(model, "Duty ratio");
residual_.arm_posture_param_id_ = ParameterIndex(model, "Arm posture");
residual_.balance_cost_id_ = CostTermByName(model, "Balance");
residual_.upright_cost_id_ = CostTermByName(model, "Upright");
residual_.height_cost_id_ = CostTermByName(model, "Height");
Expand Down
1 change: 1 addition & 0 deletions mjpc/tasks/quadruped/quadruped.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class QuadrupedFlat : public Task {
int cadence_param_id_ = -1;
int amplitude_param_id_ = -1;
int duty_param_id_ = -1;
int arm_posture_param_id_ = -1;
int upright_cost_id_ = -1;
int balance_cost_id_ = -1;
int height_cost_id_ = -1;
Expand Down
1 change: 1 addition & 0 deletions mjpc/tasks/quadruped/task_flat.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
<numeric name="residual_select_Biped type" data="0"/>
<text name="residual_list_Biped type" data="Foot Stand|Hand Stand"/>
<numeric name="residual_Heading" data="0 -3.14 3.14" />
<numeric name="residual_Arm posture" data=".03 0 1"/>

<!-- estimator -->
<numeric name="estimator" data="0" />
Expand Down
8 changes: 6 additions & 2 deletions mjpc/tasks/shadow_reorient/task.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
<size memory="1M"/>

<custom>
<numeric name="agent_planner" data="0" />
<numeric name="agent_planner" data="5" />
<numeric name="agent_horizon" data="0.25" />
<numeric name="agent_timestep" data="0.01" />
<numeric name="agent_policy_width" data="0.0035" />
<numeric name="sampling_spline_points" data="5" />
<numeric name="sampling_exploration" data="0.1" />
<numeric name="sampling_exploration" data="0.2" />
<numeric name="sampling_representation" data="0" />
<numeric name="sampling_trajectories" data="60" />
<numeric name="n_elite" data="8" />
<numeric name="explore_fraction" data="0.5" />

<numeric name="robust_xfrc" data="0.004" />
</custom>

Expand Down
18 changes: 18 additions & 0 deletions mjpc/utilities.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,24 @@ extern "C" {
}

namespace mjpc {
// make model differentiable by setting solimp[0] to zero
void MakeDifferentiable(mjModel* model) {
// joints
for (int i = 0; i < model->njnt; i++) {
model->jnt_solimp[mjNIMP * i] = 0.0;
}

// geoms
for (int i = 0; i < model->ngeom; i++) {
model->geom_solimp[mjNIMP * i] = 0.0;
}

// contact pairs
for (int i = 0; i < model->npair; i++) {
model->pair_solimp[mjNIMP * i] = 0.0;
}
}

// set mjData state
void SetState(const mjModel* model, mjData* data, const double* state) {
mju_copy(data->qpos, state, model->nq);
Expand Down
3 changes: 3 additions & 0 deletions mjpc/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ namespace mjpc {
// maximum number of traces that are visualized
inline constexpr int kMaxTraces = 99;

// make model differentiable by setting solimp[0] to zero
void MakeDifferentiable(mjModel* model);

// set mjData state
void SetState(const mjModel* model, mjData* data, const double* state);

Expand Down

0 comments on commit 9bd2037

Please sign in to comment.