Minor cleanups.

1. Further BLAS and heap allocation cleanups in  schur_eliminator_impl.h
2. Modularize blas.h using macros.
3. Lint cleanups from William Rucklidge.
4. Small changes to jet.h
5. ResidualBlock now uses blas.h

Performance improvements:

For static and dynamic sized blocks, the peformance is not changed much.

-use_quaternions -ordering user -linear_solver sparse_schur

                                        master         change
problem: 16-22106
gcc                                        3.4            3.3
clang                                      2.8            2.7

problem: 49-7776
gcc                                        1.7            1.7
clang                                      1.4            1.4

problem: 245-198739
gcc                                       80.1           79.6
clang                                     80.6           76.2

problem: 257-65132
gcc                                       12.2           12.0
clang                                     10.4           10.2

problem: 356-226730
gcc                                       99.0           96.8
clang                                     88.9           88.3

problem: 744-543562
gcc                                      361.5          356.2
clang                                    352.7          343.5

problem: 1024-110968
gcc                                       45.9           45.6
clang                                     42.6           42.1

However, performance when using local parameterizations is
significantly improved due to residual_block.cc using blas.h

-use_quaternions -use_local_parameterization -ordering user -linear_solver sparse_schur

                                        master         change
problem: 16-22106
gcc                                        3.6            3.3
clang                                      3.5            2.8

problem: 49-7776
gcc                                        1.8            1.6
clang                                      1.7            1.4

problem: 245-198739
gcc                                       79.7           76.1
clang                                     79.7           73.0

problem: 257-65132
gcc                                       12.8           11.9
clang                                     12.3            9.8

problem: 356-226730
gcc                                      101.9           93.5
clang                                    105.0           86.8

problem: 744-543562
gcc                                      367.9          350.5
clang                                    355.3          323.1

problem: 1024-110968
gcc                                       43.0           40.3
clang                                     41.0           37.5

Change-Id: I6dcf7476ddaa77cb116558d112a9cf1e832f5fc9
diff --git a/include/ceres/jet.h b/include/ceres/jet.h
index 1238123..000bd1c 100644
--- a/include/ceres/jet.h
+++ b/include/ceres/jet.h
@@ -348,8 +348,8 @@
   //   b + v   (b + v)(b - v)        b^2
   //
   // which holds because v*v = 0.
-  h.a = f.a / g.a;
-  const T g_a_inverse = 1.0 / g.a;
+  const T g_a_inverse = T(1.0) / g.a;
+  h.a = f.a * g_a_inverse;
   const T f_a_by_g_a = f.a * g_a_inverse;
   for (int i = 0; i < N; ++i) {
     h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
@@ -450,7 +450,7 @@
 Jet<T, N> sqrt(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = sqrt(f.a);
-  const T two_a_inverse = 1.0 / (T(2.0) * g.a);
+  const T two_a_inverse = T(1.0) / (T(2.0) * g.a);
   g.v = f.v * two_a_inverse;
   return g;
 }