Speed up AngleAxisRotatePoint.
This leads to a ~11% speedup on my mac using clang.
Based on suggestions by Thad Hughes.
Change-Id: If8495c8de6e28f63fb065e35bd6f382dd9fcbbea
diff --git a/include/ceres/rotation.h b/include/ceres/rotation.h
index ea0b769..e014e04 100644
--- a/include/ceres/rotation.h
+++ b/include/ceres/rotation.h
@@ -580,10 +580,6 @@
template<typename T> inline
void AngleAxisRotatePoint(const T angle_axis[3], const T pt[3], T result[3]) {
- T w[3];
- T sintheta;
- T costheta;
-
const T theta2 = DotProduct(angle_axis, angle_axis);
if (theta2 > 0.0) {
// Away from zero, use the rodriguez formula
@@ -597,19 +593,25 @@
// we get a division by zero.
//
const T theta = sqrt(theta2);
- w[0] = angle_axis[0] / theta;
- w[1] = angle_axis[1] / theta;
- w[2] = angle_axis[2] / theta;
- costheta = cos(theta);
- sintheta = sin(theta);
- T w_cross_pt[3];
- CrossProduct(w, pt, w_cross_pt);
- T w_dot_pt = DotProduct(w, pt);
- for (int i = 0; i < 3; ++i) {
- result[i] = pt[i] * costheta +
- w_cross_pt[i] * sintheta +
- w[i] * (T(1.0) - costheta) * w_dot_pt;
- }
+ const T costheta = cos(theta);
+ const T sintheta = sin(theta);
+ const T theta_inverse = 1.0 / theta;
+
+ const T w[3] = { angle_axis[0] * theta_inverse,
+ angle_axis[1] * theta_inverse,
+ angle_axis[2] * theta_inverse };
+
+ // Explicitly inlined evaluation of the cross product for
+ // performance reasons.
+ const T w_cross_pt[3] = { w[1] * pt[2] - w[2] * pt[1],
+ w[2] * pt[0] - w[0] * pt[2],
+ w[0] * pt[1] - w[1] * pt[0] };
+ const T tmp =
+ (w[0] * pt[0] + w[1] * pt[1] + w[2] * pt[2]) * (T(1.0) - costheta);
+
+ result[0] = pt[0] * costheta + w_cross_pt[0] * sintheta + w[0] * tmp;
+ result[1] = pt[1] * costheta + w_cross_pt[1] * sintheta + w[1] * tmp;
+ result[2] = pt[2] * costheta + w_cross_pt[2] * sintheta + w[2] * tmp;
} else {
// Near zero, the first order Taylor approximation of the rotation
// matrix R corresponding to a vector w and angle w is
@@ -625,11 +627,16 @@
//
// Switching to the Taylor expansion at zero helps avoid all sorts
// of numerical nastiness.
- T w_cross_pt[3];
- CrossProduct(angle_axis, pt, w_cross_pt);
- for (int i = 0; i < 3; ++i) {
- result[i] = pt[i] + w_cross_pt[i];
- }
+
+ // Explicitly inlined evaluation of the cross product for
+ // performance reasons.
+ const T w_cross_pt[3] = { angle_axis[1] * pt[2] - angle_axis[2] * pt[1],
+ angle_axis[2] * pt[0] - angle_axis[0] * pt[2],
+ angle_axis[0] * pt[1] - angle_axis[1] * pt[0] };
+
+ result[0] = pt[0] + w_cross_pt[0];
+ result[1] = pt[1] + w_cross_pt[1];
+ result[2] = pt[2] + w_cross_pt[2];
}
}