Fix DynamicAutoDiffCostFunction
Changed DynamicAutoDiffCostFunction to handle multiple derivative
sections as opposed to just a single contiguous block.
In the previous implementation it was assumed that non-constant
parameters occur in a single contiguous block so that constant
parameters could NOT lie between non-constant parameters. Previously,
start_derivative_section was first set as soon as the first
non-constant parameter block (marked by jacobians[i] != NULL) was
encountered. After this, entries in input_jets[parameter_cursor].v were
accessed with `parameter_cursor - start_derivative_section`. For
contiguous non-constant parameter blocks this is fine, but if constant
parameter blocks fall between then this indexing is incorrect because
`parameter_cursor - start_derivative_section` can go out of bounds.
For a concrete example, take a cost function with three parameter
blocks, each of size 1 and with the center block fixed. Assume that
Stride=1 so that two passes are required. On the first pass
start_derivative_section=0, and the first variable block is handled
correctly. At the end of the first pass end_derivative_section=1, so
for the second pass start_derivative_section=1. Now comes the problem.
When parameter_cursor=1, parameter_cursor >= start_derivative_section
so jacobian[1] is checked to be NULL. Since it is NULL (second
parameter block is constant) then nothing is done and
active_parameter_count is NOT incremented. Next, when
parameter_cursor=2, parameter_cursor >= start_derivative_section and
jacobian[2] is checked. Since it is not NULL then
input_jets[parameter_cursor].v[parameter_cursor -
start_derivative_section] is set to 1.0, BUT parameter_cursor -
start_derivative_section = 2 - 1 = 1 which is out of bounds
(input_jets[parameter_cursor].v is only of size Stride=1).
The proposed solution records the start of each contiguous block of
non-constant parameters and indexing into
input_jets[parameter_cursor].v is independent of parameter_cursor.
Change-Id: I388ab6a0bafa35d317491135ec6fe980453ff888
diff --git a/include/ceres/dynamic_autodiff_cost_function.h b/include/ceres/dynamic_autodiff_cost_function.h
index 38bdb0a..5d8f188 100644
--- a/include/ceres/dynamic_autodiff_cost_function.h
+++ b/include/ceres/dynamic_autodiff_cost_function.h
@@ -126,17 +126,28 @@
vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks,
static_cast<Jet<double, Stride>* >(NULL));
int num_active_parameters = 0;
- int start_derivative_section = -1;
- for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
+
+ // To handle constant parameters between non-constant parameter blocks, the
+ // start position --- a raw parameter index --- of each contiguous block of
+ // non-constant parameters is recorded in start_derivative_section.
+ vector<int> start_derivative_section;
+ bool in_derivative_section = false;
+ int parameter_cursor = 0;
+
+ // Discover the derivative sections and set the parameter values.
+ for (int i = 0; i < num_parameter_blocks; ++i) {
jet_parameters[i] = &input_jets[parameter_cursor];
const int parameter_block_size = parameter_block_sizes()[i];
if (jacobians[i] != NULL) {
- start_derivative_section =
- (start_derivative_section == -1)
- ? parameter_cursor
- : start_derivative_section;
+ if (!in_derivative_section) {
+ start_derivative_section.push_back(parameter_cursor);
+ in_derivative_section = true;
+ }
+
num_active_parameters += parameter_block_size;
+ } else {
+ in_derivative_section = false;
}
for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) {
@@ -144,29 +155,54 @@
}
}
+ // When `num_active_parameters % Stride != 0` then it can be the case
+ // that `active_parameter_count < Stride` while parameter_cursor is less
+ // than the total number of parameters and with no remaining non-constant
+ // parameter blocks. Pushing parameter_cursor (the total number of
+ // parameters) as a final entry to start_derivative_section is required
+ // because if a constant parameter block is encountered after the
+ // last non-constant block then current_derivative_section is incremented
+ // and would otherwise index an invalid position in
+ // start_derivative_section. Setting the final element to the total number
+ // of parameters means that this can only happen at most once in the loop
+ // below.
+ start_derivative_section.push_back(parameter_cursor);
+
// Evaluate all of the strides. Each stride is a chunk of the derivative to
// evaluate, typically some size proportional to the size of the SIMD
// registers of the CPU.
int num_strides = static_cast<int>(ceil(num_active_parameters /
static_cast<float>(Stride)));
+ int current_derivative_section = 0;
+ int current_derivative_section_cursor = 0;
+
for (int pass = 0; pass < num_strides; ++pass) {
// Set most of the jet components to zero, except for
// non-constant #Stride parameters.
+ const int initial_derivative_section = current_derivative_section;
+ const int initial_derivative_section_cursor =
+ current_derivative_section_cursor;
+
int active_parameter_count = 0;
- int end_derivative_section = start_derivative_section;
- for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
+ parameter_cursor = 0;
+
+ for (int i = 0; i < num_parameter_blocks; ++i) {
for (int j = 0; j < parameter_block_sizes()[i];
++j, parameter_cursor++) {
input_jets[parameter_cursor].v.setZero();
- if (parameter_cursor >= start_derivative_section &&
- active_parameter_count < Stride) {
+ if (active_parameter_count < Stride &&
+ parameter_cursor >= (
+ start_derivative_section[current_derivative_section] +
+ current_derivative_section_cursor)) {
if (jacobians[i] != NULL) {
- input_jets[parameter_cursor]
- .v[parameter_cursor - start_derivative_section] = 1.0;
+ input_jets[parameter_cursor].v[active_parameter_count] = 1.0;
++active_parameter_count;
+ ++current_derivative_section_cursor;
+ } else {
+ ++current_derivative_section;
+ current_derivative_section_cursor = 0;
}
- ++end_derivative_section;
}
}
}
@@ -177,18 +213,27 @@
// Copy the pieces of the jacobians into their final place.
active_parameter_count = 0;
+
+ current_derivative_section = initial_derivative_section;
+ current_derivative_section_cursor = initial_derivative_section_cursor;
+
for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
for (int j = 0; j < parameter_block_sizes()[i];
++j, parameter_cursor++) {
- if (parameter_cursor >= start_derivative_section &&
- active_parameter_count < Stride) {
+ if (active_parameter_count < Stride &&
+ parameter_cursor >= (
+ start_derivative_section[current_derivative_section] +
+ current_derivative_section_cursor)) {
if (jacobians[i] != NULL) {
for (int k = 0; k < num_residuals(); ++k) {
jacobians[i][k * parameter_block_sizes()[i] + j] =
- output_jets[k].v[parameter_cursor -
- start_derivative_section];
+ output_jets[k].v[active_parameter_count];
}
++active_parameter_count;
+ ++current_derivative_section_cursor;
+ } else {
+ ++current_derivative_section;
+ current_derivative_section_cursor = 0;
}
}
}
@@ -201,8 +246,6 @@
residuals[k] = output_jets[k].a;
}
}
-
- start_derivative_section = end_derivative_section;
}
return true;
}