Fix DynamicAutoDiffCostFunction

Changed DynamicAutoDiffCostFunction to handle multiple derivative
sections as opposed to just a single contiguous block.

In the previous implementation it was assumed that non-constant
parameters occur in a single contiguous block so that constant
parameters could NOT lie between non-constant parameters. Previously,
start_derivative_section was first set as soon as the first
non-constant parameter block (marked by jacobians[i] != NULL) was
encountered. After this, entries in input_jets[parameter_cursor].v were
accessed with `parameter_cursor - start_derivative_section`. For
contiguous non-constant parameter blocks this is fine, but if constant
parameter blocks fall between then this indexing is incorrect because
`parameter_cursor - start_derivative_section` can go out of bounds.

For a concrete example, take a cost function with three parameter
blocks, each of size 1 and with the center block fixed. Assume that
Stride=1 so that two passes are required. On the first pass
start_derivative_section=0, and the first variable block is handled
correctly. At the end of the first pass end_derivative_section=1, so
for the second pass start_derivative_section=1. Now comes the problem.
When parameter_cursor=1, parameter_cursor >= start_derivative_section
so jacobian[1] is checked to be NULL. Since it is NULL (second
parameter block is constant) then nothing is done and
active_parameter_count is NOT incremented. Next, when
parameter_cursor=2, parameter_cursor >= start_derivative_section and
jacobian[2] is checked. Since it is not NULL then
input_jets[parameter_cursor].v[parameter_cursor -
start_derivative_section] is set to 1.0, BUT parameter_cursor -
start_derivative_section = 2 - 1 = 1 which is out of bounds
(input_jets[parameter_cursor].v is only of size Stride=1).

The proposed solution records the start of each contiguous block of
non-constant parameters and indexing into
input_jets[parameter_cursor].v is independent of parameter_cursor.

Change-Id: I388ab6a0bafa35d317491135ec6fe980453ff888
diff --git a/include/ceres/dynamic_autodiff_cost_function.h b/include/ceres/dynamic_autodiff_cost_function.h
index 38bdb0a..5d8f188 100644
--- a/include/ceres/dynamic_autodiff_cost_function.h
+++ b/include/ceres/dynamic_autodiff_cost_function.h
@@ -126,17 +126,28 @@
     vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks,
         static_cast<Jet<double, Stride>* >(NULL));
     int num_active_parameters = 0;
-    int start_derivative_section = -1;
-    for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
+
+    // To handle constant parameters between non-constant parameter blocks, the
+    // start position --- a raw parameter index --- of each contiguous block of
+    // non-constant parameters is recorded in start_derivative_section.
+    vector<int> start_derivative_section;
+    bool in_derivative_section = false;
+    int parameter_cursor = 0;
+
+    // Discover the derivative sections and set the parameter values.
+    for (int i = 0; i < num_parameter_blocks; ++i) {
       jet_parameters[i] = &input_jets[parameter_cursor];
 
       const int parameter_block_size = parameter_block_sizes()[i];
       if (jacobians[i] != NULL) {
-        start_derivative_section =
-            (start_derivative_section == -1)
-            ? parameter_cursor
-            : start_derivative_section;
+        if (!in_derivative_section) {
+          start_derivative_section.push_back(parameter_cursor);
+          in_derivative_section = true;
+        }
+
         num_active_parameters += parameter_block_size;
+      } else {
+        in_derivative_section = false;
       }
 
       for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) {
@@ -144,29 +155,54 @@
       }
     }
 
+    // When `num_active_parameters % Stride != 0` then it can be the case
+    // that `active_parameter_count < Stride` while parameter_cursor is less
+    // than the total number of parameters and with no remaining non-constant
+    // parameter blocks. Pushing parameter_cursor (the total number of
+    // parameters) as a final entry to start_derivative_section is required
+    // because if a constant parameter block is encountered after the
+    // last non-constant block then current_derivative_section is incremented
+    // and would otherwise index an invalid position in
+    // start_derivative_section. Setting the final element to the total number
+    // of parameters means that this can only happen at most once in the loop
+    // below.
+    start_derivative_section.push_back(parameter_cursor);
+
     // Evaluate all of the strides. Each stride is a chunk of the derivative to
     // evaluate, typically some size proportional to the size of the SIMD
     // registers of the CPU.
     int num_strides = static_cast<int>(ceil(num_active_parameters /
                                             static_cast<float>(Stride)));
 
+    int current_derivative_section = 0;
+    int current_derivative_section_cursor = 0;
+
     for (int pass = 0; pass < num_strides; ++pass) {
       // Set most of the jet components to zero, except for
       // non-constant #Stride parameters.
+      const int initial_derivative_section = current_derivative_section;
+      const int initial_derivative_section_cursor =
+        current_derivative_section_cursor;
+
       int active_parameter_count = 0;
-      int end_derivative_section = start_derivative_section;
-      for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
+      parameter_cursor = 0;
+
+      for (int i = 0; i < num_parameter_blocks; ++i) {
         for (int j = 0; j < parameter_block_sizes()[i];
              ++j, parameter_cursor++) {
           input_jets[parameter_cursor].v.setZero();
-          if (parameter_cursor >= start_derivative_section &&
-              active_parameter_count < Stride) {
+          if (active_parameter_count < Stride &&
+              parameter_cursor >= (
+                start_derivative_section[current_derivative_section] +
+                current_derivative_section_cursor)) {
             if (jacobians[i] != NULL) {
-              input_jets[parameter_cursor]
-                  .v[parameter_cursor - start_derivative_section] = 1.0;
+              input_jets[parameter_cursor].v[active_parameter_count] = 1.0;
               ++active_parameter_count;
+              ++current_derivative_section_cursor;
+            } else {
+              ++current_derivative_section;
+              current_derivative_section_cursor = 0;
             }
-            ++end_derivative_section;
           }
         }
       }
@@ -177,18 +213,27 @@
 
       // Copy the pieces of the jacobians into their final place.
       active_parameter_count = 0;
+
+      current_derivative_section = initial_derivative_section;
+      current_derivative_section_cursor = initial_derivative_section_cursor;
+
       for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
         for (int j = 0; j < parameter_block_sizes()[i];
              ++j, parameter_cursor++) {
-          if (parameter_cursor >= start_derivative_section &&
-              active_parameter_count < Stride) {
+          if (active_parameter_count < Stride &&
+              parameter_cursor >= (
+                start_derivative_section[current_derivative_section] +
+                current_derivative_section_cursor)) {
             if (jacobians[i] != NULL) {
               for (int k = 0; k < num_residuals(); ++k) {
                 jacobians[i][k * parameter_block_sizes()[i] + j] =
-                    output_jets[k].v[parameter_cursor -
-                                     start_derivative_section];
+                    output_jets[k].v[active_parameter_count];
               }
               ++active_parameter_count;
+              ++current_derivative_section_cursor;
+            } else {
+              ++current_derivative_section;
+              current_derivative_section_cursor = 0;
             }
           }
         }
@@ -201,8 +246,6 @@
           residuals[k] = output_jets[k].a;
         }
       }
-
-      start_derivative_section = end_derivative_section;
     }
     return true;
   }